import dataiku
import dataikuapi
from dku_utils.projects.connections.connection_change_filesystem import change_filesystem_dataset_format, switch_managed_dataset_connection_to_local_filesytem_storage, switch_managed_dataset_connection_to_cloud_storage
from dku_utils.projects.connections.connection_change_sql import switch_managed_dataset_connection_to_sql
from dku_utils.projects.datasets.dataset_commons import get_dataset_in_connection_settings
from dku_utils.projects.recipes.recipe_commons import get_recipe_available_engines, switch_recipe_engine, set_spark_configuration_on_recipe

def get_connection_type(project, dataset_name, connection_name):
    tmp_dataset_name = "_tmp_dataset"
    tmp_recipe_name = "_tmp_recipe"
    
    tmp_dataset = project.get_dataset(tmp_dataset_name)
    builder = dataikuapi.CodeRecipeCreator(tmp_recipe_name, "python", project)
    builder = builder.with_new_output_dataset(tmp_dataset_name, connection_name)
    
    tmp_dataset = project.get_dataset(tmp_dataset_name)
    tmp_dataset_settings = tmp_dataset.get_settings().settings
    
    tmp_dataset.delete()
    
    return tmp_dataset_settings["type"]

project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()

# Update input dataset connection info
logs_connection = variables["local"]["logs_connection"]
dataset = project.get_dataset("compute_resource_usage_logs")
dataset_settings = dataset.get_settings()
dataset_settings.settings["params"]["connection"] = logs_connection
dataset_settings.settings["type"] = get_connection_type(project, "compute_resource_usage_logs", logs_connection)
dataset_settings.save()

# Update first prepare recipe "compute_logs_normalized" to toggle the right Cleanup step group depending on the format of the logs (log4j or Event server)
variables = project.get_variables()
logs_origin = variables["local"]["logs_origin"] # log4j or event_server

prepare_recipe = project.get_recipe("compute_logs_normalized")
recipe_settings = prepare_recipe.get_settings()
recipe_json = recipe_settings.get_json_payload()
if logs_origin == "event_server":
    recipe_json["steps"][0]["disabled"] = False
    recipe_json["steps"][1]["disabled"] = True
elif logs_origin == "log4j":
    recipe_json["steps"][0]["disabled"] = True
    recipe_json["steps"][1]["disabled"] = False
else:
    raise Exception(f"Unexpected logs origin '{logs_origin}'. Expected log4j or event_server.")

recipe_settings.save()

print(f"compute_logs_normalized recipe updated to process '{logs_origin}' logs.")

# Update managed dataset connection
managed_dataset_connection = variables["local"].get("managed_ds_connection", "")
if managed_dataset_connection:
    managed_dataset_connection_type = get_dataset_in_connection_settings(project, managed_dataset_connection)["type"]
    MANAGED_DS = [
        "logs_capacity_planning_stacked",
        "logs_k8s",
        "logs_k8s_deduplicated",
        "logs_k8s_prepared",
        "logs_k8s_prepared_windows",
        "logs_k8s_related_jobs",
        "logs_local_process",
        "logs_local_process_CPU_ranked",
        "logs_local_process_by_hours",
        "logs_local_process_by_minutes",
        "logs_local_process_dedupicated",
        "logs_local_process_frequencies_stats_prepared",
        "logs_local_process_last",
        "logs_local_process_memory_rank",
        "logs_local_process_over_minutes",
        "logs_local_process_prepared",
        "logs_local_process_prepared_distinct",
        "logs_local_process_prepared_frequencies_stats",
        "logs_local_process_prepared_windows",
        "logs_normalized",
        "logs_others",
        "logs_sql_connection",
        "logs_sql_connection_complete",
        "logs_sql_connection_prepared",
        "total_logs_by_day",
        "total_logs_prepared",
    ]

    for ds_name in MANAGED_DS:
        if managed_dataset_connection_type == "Filesystem":
            switch_managed_dataset_connection_to_local_filesytem_storage(project, ds_name, managed_dataset_connection)
            change_filesystem_dataset_format(project, ds_name, "csv", change_dataset_format_type=True)
        elif managed_dataset_connection_type in ["PostgreSQL", "Snowflake", "SQLServer", "Databricks"]:
            switch_managed_dataset_connection_to_sql(project, ds_name, managed_dataset_connection, True)
        else:
            switch_managed_dataset_connection_to_cloud_storage(project, ds_name, managed_dataset_connection)
            settings = project.get_dataset(ds_name).get_settings()
            settings.settings["formatType"] = "parquet"
            settings.settings["formatParams"] = {
                "parquetLowerCaseIdentifiers": False,
                "representsNullFields": False,
                "parquetCompressionMethod": "SNAPPY",
                "parquetFlavor": "HIVE",
                "parquetBlockSizeMB": 128
            }
            settings.save()
