import os
from collections.abc import Callable
from typing import Any, Optional

from langchain_core.vectorstores import VectorStore
from pinecone import Pinecone, ServerlessSpec
from typing_extensions import override

from dataiku.base.utils import package_is_at_least_no_import
from dataiku.core.vector_stores.dku_vector_store import DkuRemoteVectorStore, logger
from dataiku.core.vector_stores.vector_store_document_filter import MongoDBLikeVectorStoreDocumentFilter
from dataiku.llm.types import RetrievableKnowledge
from dataikuapi.dss.admin import DSSConnection, DSSConnectionInfo
from dataikuapi.dss.langchain import DKUEmbeddings

try:
    from langchain_pinecone import PineconeVectorStore as LangchainPinecone  # type: ignore
except ImportError:
    from langchain_community.vectorstores import Pinecone as LangchainPinecone  # type: ignore



class PineconeV3VectorStore(DkuRemoteVectorStore):
    DEFAULT_NAMESPACE = ""

    def __init__(self, kb: RetrievableKnowledge, exec_folder: str, connection_info_retriever: Callable[[str], DSSConnectionInfo]):
        self.pinecone_connection_info: Optional[DSSConnectionInfo] = None
        self.pinecone_client: Optional[Pinecone] = None
        kb["resolvedIndexName"] = kb["pineconeIndexName"]  # TODO @rag Standardise pinecone to work the same as ElasticSearch with index name resolution
        self.pinecone_index_name:str = kb["resolvedIndexName"]
        super(PineconeV3VectorStore, self).__init__(kb, exec_folder, connection_info_retriever, bulk_size=200)
        self.document_filter = MongoDBLikeVectorStoreDocumentFilter(self.metadata_column_type_and_meaning)

    def init_connection(self) -> None:
        self.pinecone_connection_info = self.connection_info_retriever(self.connection_name)

        # support using a custom host instead of the public API, a local container for instance
        dku_properties = self.pinecone_connection_info.get_params().get('dkuProperties', {})
        dku_properties = {p['name']: p['value'] for p in dku_properties}
        kwargs = {}
        if custom_host := dku_properties.get('pinecone.custom.host'):
            kwargs['host'] = custom_host

        self.pinecone_client = Pinecone(api_key=self.pinecone_connection_info["resolvedAPIKey"], **kwargs)
        os.environ["PINECONE_API_KEY"] = self.pinecone_connection_info["resolvedAPIKey"]  # required so that langchain pinecone auth succeeds

        # auto create missing indexes, useful when testing locally
        if dku_properties.get('pinecone.custom.auto_create_index') and self.pinecone_index_name not in self.pinecone_client.list_indexes().names():
            self.pinecone_client.create_index(
                name=self.pinecone_index_name,
                dimension=1536,
                metric="cosine",
                spec=ServerlessSpec(cloud="aws", region="us-east-1")  # we must pass valid info here even though it's ignored
            )

    def get_db(self, embeddings: DKUEmbeddings, allow_creation: bool = False, **kwargs: Any) -> VectorStore:
        assert self.pinecone_client is not None, "Pinecone client not initialized"
        index = self.pinecone_client.Index(self.pinecone_index_name)
        if package_is_at_least_no_import("langchain-pinecone", "0.1"):
            embed = embeddings
        else:
            embed = embeddings.embed_query
        db = LangchainPinecone(index, embed, "text")

        # allow_creation param unused: We always require the Pinecone index to have been created already
        if self.pinecone_index_name not in self.pinecone_client.list_indexes().names():
            raise Exception(f'Pinecone index "{self.pinecone_index_name}" cannot be found. Please create this index in Pinecone.')

        return db

    def clear_index(self) -> None:
        default_namespace = self.get_default_namespace()
        if not default_namespace is None:  # if default namespace doesn't exist, clear will fail for serverless indexes
            default_namespace.delete(delete_all=True)
        logger.info("Cleared Pinecone index {}".format(self.pinecone_index_name))

    @override
    def delete_documents(self, documents_uuids:  list[str], embeddings: DKUEmbeddings) -> VectorStore:
        if len(documents_uuids) > 0 :
            # if default namespace doesn't exist (deleted from pinecone UI), deletion will fail for serverless indexes
            if self.get_default_namespace() is None:
                logger.warning("Default namespace was deleted or cleared, skipping deletion")
                return

            vectorstore_db = self.get_db(embeddings=embeddings, allow_creation=False)
            vectorstore_db.delete(ids=documents_uuids)
            return vectorstore_db
        return None

    def get_default_namespace(self):
        if self.pinecone_client is None:
            logger.warning("Pinecone client not initialized, skipping")
            return None

        idx = self.pinecone_client.Index(self.pinecone_index_name)
        index_stats = idx.describe_index_stats()
        if index_stats["namespaces"].get(self.DEFAULT_NAMESPACE, {}).get('vector_count', 0) > 0:
            return idx
        else:
            return None
