import fcntl
import logging
import os
import shutil
from typing import TYPE_CHECKING

from dataiku.core.locking import (
    acquire_lockfile,
    try_acquire_lockfile,
    release_lockfile
)
from dataiku.core.vector_stores.lifecycle.base import (
    get_folder_path_to_load_kb,
    download_to_folder,
    create_langchain_vector_store
)

if TYPE_CHECKING:
    from langchain_core.vectorstores import VectorStore

logger = logging.getLogger(__name__)


def load_into_shared_folder(project_key: str, kb_id: str, version: str) -> 'VectorStoreSharedFolder':
    """
    Loads the vector store files on disk.
    For local vector stores, downloads metadata files as well as data files.
    For remote vector stores, only downloads metadata files.

    ..note:
        The folder is shared between concurrent processes.
        Only one instance of the shared folder is allowed by process.

    :param str project_key: The knowledge bank project key.
    :param str kb_id: The knowledge bank identifier.
    :param str version: The knowledge bank version.
    :rtype: :class:`VectorStoreSharedFolder`
    """
    root_folder_path = get_folder_path_to_load_kb()
    # fixed name to allow for shared look-ups
    folder_name = "{}-{}-{}".format(project_key, kb_id, version)
    folder_path = os.path.join(root_folder_path, folder_name)
    # for protecting calls to the filesystem
    lockfile_path = _get_lockfile_path(root_folder_path, folder_name)
    # for protecting removals
    shared_lockfile_path = _get_shared_lockfile_path(root_folder_path, folder_name)

    with acquire_lockfile(lockfile_path):
        if os.path.exists(folder_path):
            logger.debug("reuse shared folder {}".format(folder_path))
        else:
            logger.debug("load shared folder {}".format(folder_path))
            download_to_folder(
                project_key, kb_id, version, folder_path,
                use_latest_settings=False
            )

        shared_fd, _ = try_acquire_lockfile(shared_lockfile_path, operation=fcntl.LOCK_SH)

    return VectorStoreSharedFolder(root_folder_path, folder_name, shared_fd)


class VectorStoreSharedFolder:

    def __init__(self, root_folder_path: str, folder_name: str, shared_fd):
        self._root_folder_path = root_folder_path
        self._folder_name = folder_name
        self._shared_fd = shared_fd
        self._is_latest = True

    def __repr__(self):
        return "{}(path={})".format(
            self.__class__.__name__,
            self.folder_path
        )

    @property
    def folder_path(self) -> str:
        return os.path.join(self._root_folder_path, self._folder_name)

    def set_outdated(self):
        self._is_latest = False

    def remove_unless_used(self):
        try:
            _remove_unless_used(
                self._root_folder_path, self._folder_name,
                shared_fd=self._shared_fd, is_latest=self._is_latest
            )

        except Exception as e:
            logger.warning("Could not remove shared folder {}: {}".format(
                self.folder_path, e))

    def create_langchain_vectorstore(self, **vectorstore_kwargs) -> 'VectorStore':
        """
        Creates a Langchain Vectorstore object backed by this folder.

        :rtype: :class:`langchain_core.vectorstores.VectorStore`
        """
        # creating a langchain vector store may apply migrations on disk files
        # hence we need a file lock here to make the creation safe
        lockfile_path = _get_lockfile_path(
            self._root_folder_path, self._folder_name)

        with acquire_lockfile(lockfile_path):
            return create_langchain_vector_store(
                self.folder_path, **vectorstore_kwargs)


def remove_unused_versions_except_latest(project_key: str, knowledge_bank_id: str):
    root_folder_path = get_folder_path_to_load_kb()
    prefix = "{}-{}-".format(project_key, knowledge_bank_id)
    suffix = ".flock"

    def extract_version(file_name):
        return file_name[len(prefix):-len(suffix)]

    lockfile_names_by_version = {
        extract_version(file_name): file_name
        for file_name in os.listdir(root_folder_path)
        if file_name.startswith(prefix) and file_name.endswith(suffix)
    }

    if len(lockfile_names_by_version) <= 1:
        return  # no more than one version, abort

    versions = list(lockfile_names_by_version.keys())
    versions = sorted(versions, key=_as_comparable)
    versions.pop(-1)  # ignore latest version

    for v in versions:
        lockfile_name = lockfile_names_by_version[v]
        folder_name = lockfile_name[:-len(suffix)]
        _remove_unless_used(root_folder_path, folder_name)


def _as_comparable(version: str) -> int:
    mapping = {
        "not-built": -3,
        "not-versioned": -2,
        "single-version": -1,
    }

    # current: version is a positive integer
    return mapping[version] if version in mapping else int(version)


def _remove_unless_used(root_folder_path: str, folder_name: str, shared_fd=None, is_latest=False):
    folder_path = os.path.join(root_folder_path, folder_name)
    lockfile_path = _get_lockfile_path(root_folder_path, folder_name)
    shared_lockfile_path = _get_shared_lockfile_path(root_folder_path, folder_name)

    if not os.path.exists(lockfile_path):
        return  # should never happen, except in unit tests

    with acquire_lockfile(lockfile_path):
        if shared_fd:
            release_lockfile(shared_lockfile_path, shared_fd)

        if is_latest:
            # multiprocess case: keep the latest version on-disk to skip
            # file downloads if the next call needs this folder
            return

        if os.path.exists(shared_lockfile_path):
            fd, acquired = try_acquire_lockfile(shared_lockfile_path)
            if not acquired:
                return  # another process needs this folder

            # we are alone here, just release the file
            release_lockfile(shared_lockfile_path, fd)
            os.remove(shared_lockfile_path)

        # finally, remove the folder itself
        if os.path.exists(folder_path):
            logger.info("remove shared folder {}".format(folder_path))
            shutil.rmtree(folder_path, ignore_errors=True)


# <root_folder_path>/<folder_name>.flock
def _get_lockfile_path(root_folder_path: str, folder_name: str) -> str:
    lockfile_name = "{}.flock".format(folder_name)
    return os.path.join(root_folder_path, lockfile_name)


# <root_folder_path>/<folder_name>.shared-flock
def _get_shared_lockfile_path(root_folder_path: str, folder_name: str) -> str:
    lockfile_name = "{}.shared-flock".format(folder_name)
    return os.path.join(root_folder_path, lockfile_name)
