import json
import logging
import os

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

SENTENCE_EMBEDDING_EXAMPLES = ["This is an example sentence", "Each sentence is converted"]


def _get_models_meta_path():
    resources_models_meta_path = os.environ.get("DKU_CODE_ENV_RESOURCES_MODELS_META_PATH", "")
    if not resources_models_meta_path:
        raise EnvironmentError("This method must be called from a code env resources initialization script")

    return resources_models_meta_path


def _check_sentence_embedding_compat(model_path):
    """
    Check if a downloaded sentence transformers model is compatible with the text embedding feature

    :param model_path: path to load the model from
    :return: boolean if compatible
    """
    from sentence_transformers import SentenceTransformer
    # Simulate running data through the model to check compatibility
    try:
        model = SentenceTransformer(model_path)
        model.encode(SENTENCE_EMBEDDING_EXAMPLES)
        return True
    except:
        return False

def _update_sentence_transformers_meta(models_meta, cache_folder):
    """
    Add metadata of downloaded sentence transformers models to global models metadata

    :param models_meta: dict holding metadata of all current models
    :param cache_folder: folder with sentence transformers models
    """
    for model_dir in os.listdir(cache_folder):
        # OrgName/Model_Name => "OrgName_Model_Name" dir, default-model => "default-model" dir
        # Organization and default model names cannot contain "_"
        model_name = model_dir.replace("_", "/", 1)
        model_path = os.path.join(cache_folder, model_dir)
        config_path = os.path.join(model_path, "config.json")
        modules_path = os.path.join(model_path, "modules.json")
        if os.path.exists(config_path):
            with open(config_path, "r") as f:
                config = json.load(f)
                models_meta["sentence_transformers"][model_name] = {
                    "name": model_name,
                    "compat": _check_sentence_embedding_compat(model_path),
                    "type": config.get("model_type"),
                    "maxPositionEmbeddings": config.get("max_position_embeddings")
                }
        elif os.path.exists(modules_path):
            models_meta["sentence_transformers"][model_name] = {
                "name": model_name,
                "compat": _check_sentence_embedding_compat(model_path),
                "type": None,
                "maxPositionEmbeddings": None
            }

def update_models_meta():
    """
    Check relevant directories for models and parse their metadata
    """
    if os.environ.get("DKU_CONTAINER_EXEC"):
        logger.info("Skipping update_models_meta (containerized execution)")
        return

    if os.environ.get("DKU_NODE_TYPE") == "api":
        logger.info("Skipping update_models_meta (API Node)")
        return

    resources_models_meta_path = _get_models_meta_path()
    models_meta = {"sentence_transformers": {}}
    sentence_transformers_home = os.getenv("SENTENCE_TRANSFORMERS_HOME")
    if sentence_transformers_home:
        _update_sentence_transformers_meta(models_meta, sentence_transformers_home)

    with open(resources_models_meta_path, 'w') as f:
        json.dump(models_meta, f)

def clear_models_meta():
    """
    Clear models metadata
    """
    if os.environ.get("DKU_CONTAINER_EXEC"):
        logger.info("Skipping clear_models_meta (containerized execution)")
        return

    if os.environ.get("DKU_NODE_TYPE") == "api":
        logger.info("Skipping clear_models_meta (API Node)")
        return

    resources_models_meta_path = _get_models_meta_path()

    if not os.path.exists(resources_models_meta_path):
        return
    else:
        with open(resources_models_meta_path, 'w') as f:
            json.dump({}, f)
