import pandas as pd
import json
from datetime import datetime
import logging

import dataiku

from .object import get_or_create_flow_zone
from dataikuapi.dss.dataset import DSSDataset

client = dataiku.api_client()

def apply_tags(dict_type_name, tags, replace_tags=False, project=None):
    """
    Add or replace tags on Dataiku objects (recipes or datasets) within a project.

    Args:
        dict_type_name (dict, required):
            Dictionary where keys are object types ("recipe" or "dataset") and values
            are lists of object names. Example: {"recipe": ["recipe1"], "dataset": ["dataset1"]}.
        tags (list or str, required):
            List of tags to add or replace. If a single string is provided, it will be
            converted to a list. If replace_tags=True, these tags fully replace existing ones.
            If replace_tags=False, tags are merged with existing ones (deduplicated).
        replace_tags (bool, optional):
            If True, existing tags are fully replaced by `tags`.
            If False, tags are merged with existing ones. Default: False.
        project (dataiku.DSSProject, optional):
            Dataiku project object. If not provided, uses the default project.

    Returns:
        None:
            This function modifies the metadata of the specified objects directly in Dataiku.

    Raises:
        ValueError:
            If dict_type_name is not a dictionary, if tags is empty, or if an invalid
            object type is specified.
        KeyError:
            If an object (recipe or dataset) does not exist.
    """
    # Validate inputs
    if not isinstance(dict_type_name, dict):
        raise ValueError("'dict_type_name' must be a dictionary")

    if not tags:
        raise ValueError("'tags' cannot be empty")

    # Normalize tags to list
    if isinstance(tags, str):
        tags = [tags]

    if project is None:
        project = client.get_default_project()

    # Dictionary to map object types to their respective retrieval methods
    get_object = {"recipe": project.get_recipe, "dataset": project.get_dataset}

    for obj_type, obj_names in dict_type_name.items():
        # Validate object type
        if obj_type not in get_object:
            raise ValueError(f"Invalid object type '{obj_type}'. Supported types: {list(get_object.keys())}")

        if not isinstance(obj_names, list):
            raise ValueError(f"Values in 'dict_type_name' must be lists. Got {type(obj_names).__name__} for '{obj_type}'")

        for name in obj_names:
            if not name:
                logging.warning(f"Skipping empty name in {obj_type} list")
                continue

            try:
                dku_object = get_object[obj_type](name)
                metadata = dku_object.get_metadata()

                # Get current tags
                current_tags = metadata.get("tags", [])

                if replace_tags:
                    # Full replacement (deduplicated)
                    new_tags = list(dict.fromkeys(tags))
                    metadata["tags"] = new_tags
                    action = "replaced"
                else:
                    # Merge existing + new, with deduplication
                    new_tags = list(dict.fromkeys(current_tags + tags))
                    metadata["tags"] = new_tags
                    # Find which tags were actually added (not already present)
                    added_tags = [tag for tag in tags if tag not in current_tags]
                    action = "added" if added_tags else "already present"
                    new_tags = added_tags if added_tags else tags

                # Apply the updated metadata to the object
                dku_object.set_metadata(metadata)

                # Logging success for each object
                if new_tags:
                    logging.info(
                        f"Tags {new_tags} successfully {action} to '{name}' ({obj_type}) "
                        f"in project '{project.project_key}'."
                    )
                else:
                    logging.debug(
                        f"All tags already present on '{name}' ({obj_type}) "
                        f"in project '{project.project_key}'."
                    )

            except Exception as e:
                logging.error(
                    f"Failed to apply tags to '{name}' ({obj_type}) in project '{project.project_key}': {str(e)}"
                )
                raise

def read_file_from_folder(folder, file_name):
    """
    Reads the content of a specific file from a Dataiku folder.

    Args:
        folder_id (str): The ID of the Dataiku folder.
        file_name (str): The name of the file to read.

    Returns:
        str: The content of the file as a string.
    """
    with folder.get_download_stream(file_name) as f:
        data = f.read()
    return data.decode("utf-8")

def write_text_to_folder(text, folder, file_path):
    """
    Writes plain text to a file inside a Dataiku folder.

    Args:
        text (str): The content to write.
        folder (dataiku.Folder): The Dataiku folder where the file will be stored.
        file_path (str): The relative path of the file inside the folder.

    Returns:
        None
    """
    with folder.get_writer(file_path) as writer:
        writer.write(text.encode("utf-8"))

def write_data_into_folder(data_type: str, data, folder, file_path: str) -> None:
    """
    Writes data into a Dataiku folder as either a CSV or JSON file.

    Args:
        data_type (str): The type of file to write ('csv' or 'json').
        data (dict | list | pd.DataFrame): The data to write.
                                           - If 'json', it must be a dictionary.
                                           - If 'csv', it can be a Pandas DataFrame or a list of lists.
        folder (dataiku.Folder): The Dataiku folder where the file will be stored.
        file_path (str): The relative path of the file inside the folder.

    Returns:
        None
    """
    data_type = data_type.lower()  # Normalize case

    with folder.get_writer(file_path) as writer:
        if data_type == "json":
            writer.write(json.dumps(data, indent=4).encode("utf-8"))  # Ensure UTF-8 encoding

        elif data_type == "csv":
            csv_stream = io.StringIO()
            
            # If data is a DataFrame, write it directly
            if isinstance(data, pd.DataFrame):
                data.to_csv(csv_stream, index=False, sep=";", quoting=csv.QUOTE_MINIMAL)
            
            # If data is a list of lists, write using csv.writer
            elif isinstance(data, list):
                csv_writer = csv.writer(csv_stream, delimiter=";", quoting=csv.QUOTE_MINIMAL)
                csv_writer.writerows(data)
            
            else:
                raise ValueError("For 'csv', data must be a Pandas DataFrame or a list of lists.")

            writer.write(csv_stream.getvalue().encode("utf-8"))  # Convert to bytes
            
        else:
            raise ValueError("Unsupported data type. Use 'csv' or 'json'.")

def read_data_from_folder(data_type: str, folder, file_path: str):
    """
    Reads data from a Dataiku folder as either a CSV or JSON file.

    Args:
        data_type (str): The type of file to read ('csv' or 'json').
        folder (dataiku.Folder): The Dataiku folder containing the file.
        file_path (str): The relative path of the file inside the folder.

    Returns:
        dict | pd.DataFrame: 
            - Returns a dictionary if reading a JSON file.
            - Returns a Pandas DataFrame if reading a CSV file.
            - Returns None if an error occurs.
    """
    try:
        data_type = data_type.lower()  # Normalize case
        with folder.get_download_stream(file_path) as stream:
            content = stream.read().decode("utf-8")
        
        if data_type == "json":
            return json.loads(content)  # Return as dictionary
        elif data_type == "csv":
            return pd.read_csv(io.StringIO(content), delimiter=";")  # Return as DataFrame
        else:
            raise ValueError("Unsupported data type. Use 'csv' or 'json'.")
    except Exception as e:
        logging.exception(f"Error reading file {file_path} from folder")
        return None

def read_and_concat_json_from_folder(folder, sub_folder: str = "", flatten_structure: bool = True) -> pd.DataFrame:
    """
    Reads all JSON files from a specified sub-folder (recursively) in a Dataiku folder 
    and concatenates them into a single Pandas DataFrame.

    Args:
        folder (dataiku.Folder): The Dataiku folder object.
        sub_folder (str, optional): A subfolder path within the Dataiku folder to restrict the search.

    Returns:
        pd.DataFrame: A concatenated DataFrame of all JSON files found.
    """
    file_list = folder.list_paths_in_partition()

    # Normalize path prefix if provided
    sub_folder = sub_folder.strip("/")
    prefix = sub_folder + "/" if sub_folder else ""

    # Filter only JSON files inside the specified sub-folder
    json_files = [
        file for file in file_list
        if file.endswith(".json") and (prefix in file)
    ]

    if not json_files:
        raise ValueError(f"No JSON files found in sub-folder: '{sub_folder}'.")

    df_list = []

    for file_path in json_files:
        with folder.get_download_stream(file_path) as stream:
            content = stream.read().decode("utf-8")
            try:
                json_data = json.loads(content)
            except json.JSONDecodeError as e:
                raise ValueError(f"Invalid JSON in file {file_path}: {str(e)}")

            if flatten_structure:
                if isinstance(json_data, list):
                    df = pd.json_normalize(json_data)
                else:
                    df = pd.json_normalize([json_data])
            else:
                df = pd.DataFrame([{'file_path': file_path, 'data': json_data}])
            # Flatten structure
            #if isinstance(json_data, list):
            #    df = pd.json_normalize(json_data)
            #else:
            #    df = pd.json_normalize([json_data])

            #df['file_path'] = file_path
            df_list.append(df)

    return pd.concat(df_list, ignore_index=True) if df_list else pd.DataFrame()

def check_if_path_exists(folder, path: str) -> bool:
    """
    Checks if a file or folder exists in a Dataiku folder.

    Args:
        folder (dataiku.Folder): The Dataiku folder where the check is performed.
        path (str): The relative path of the file or folder inside the Dataiku folder.

    Returns:
        bool: True if the file or folder exists, False otherwise.
    """
    try:
        existing_paths = set(folder.list_paths_in_partition())
        return path in existing_paths or any(p.startswith(f"{path}/") for p in existing_paths)
    except Exception as e:
        logging.exception("Error while checking path existence")
        return False

def list_zone_datasets(flow_zone_name, project=None):
    """
    Lists dataset names within a specific Flow zone.

    Args:
        flow_zone_name (str): The name of the Flow zone.

    Returns:
        list[str]: A list of dataset names in the given zone.
    """
    if project is None:
        project = client.get_default_project()
        
    zone = get_or_create_flow_zone(flow_zone_name = flow_zone_name, project = project)

    insight_datasets = [i for i in zone.items if isinstance(i, DSSDataset)]
    insight_dataset_names = [i.name for i in insight_datasets]
    return insight_dataset_names

def get_dataset_columns(dataset_name, project=None):
    """
    Get the columns of a dataset.
    """
    if project is None:
        project = client.get_default_project()
    try:
        dataset = dataiku.Dataset(dataset_name)
        schema = dataset.read_schema()
        return pd.DataFrame(schema).name.tolist()
    except Exception as e:
        logging.exception("Error while getting dataset columns")
        return []



def extract_relevant_insights_from_dku_scenario_run(run):
    run_insights = {
        "run_id": run.id,
        "status": "running" if run.running else run.outcome.lower(),
        "start_dt_utc": run.start_time.isoformat(),
        "end_dt_utc": "" if run.running else run.end_time.isoformat(),
        "duration": run.duration,
        "trigger_type": "manual" if run.trigger.get("type") == "manual" else "automatic",
    }
    return run_insights

def extract_recipe_payload(recipe_name, project=None):
    """
    Retrieve the payload (internal JSON settings) of a recipe inside a Dataiku DSS project.

    Args:
        recipe_name (str): Identifier of the recipe to extract the payload from.
        project (dataikuapi.dss.project.DSSProject, optional): 
            DSS project object. If None, the default project is used.

    Returns:
        dict or str:
            - The recipe's payload as a dictionary if available.
            - An empty string ('') if the recipe cannot be accessed or retrieved.
    """
    try:
        if project is None:
            project = client.get_default_project()

        recipe = project.get_recipe(recipe_name)
        settings = recipe.get_settings()
        return settings.get_payload()

    except Exception:
        return ""

def get_zone_flow(zone, add_payload=True):
    """
    Retrieve all flow items inside a Dataiku flow zone (FlowZone object)
    in traversal order, and optionally enrich recipe items with their payload.

    Args:
        zone (dataikuapi.dss.flow.FlowZone): 
            The FlowZone object from which to retrieve items.
        add_payload (bool): 
            If True, add a 'payload' field to each RUNNABLE_RECIPE item containing
            the recipe's internal settings payload.

    Returns:
        list[dict]: 
            List of flow items in traversal order, optionally enriched with payloads.
    """
    graph = zone.get_graph()
    zone_flow_items = graph.get_items_in_traversal_order()

    if add_payload:
        for item in zone_flow_items:
            if item.get("type") == "RUNNABLE_RECIPE":
                item["payload"] = extract_recipe_payload(item.get("ref"), project = zone.get_graph().flow.project)

    return zone_flow_items

def set_project_variables(new_variables, variable_type='standard', replace=False, replace_existing_keys=True, project = None):
    """
    Sets project variables in Dataiku DSS.

    Args:
        project (dataikuapi.dss.project.DSSProject): The project object.
        new_variables (dict): Dictionary of new variables to set.
        variable_type (str): Type of variables ('standard' or 'local').
        replace (bool): If True, completely replaces all existing variables with new ones.
        replace_existing_keys (bool): If True, existing keys are replaced with new values.

    Raises:
        Exception: If there is an issue updating the project variables.
    """
    try:
        if not project: project = client.get_default_project()
        # Stop early if replace=False and new_variables is empty
        if not replace and not new_variables:
            logging.warning(f"Skipping update of {variable_type} variables on project '{project.project_key}' "
                            f"since new_variables is empty and replace=False.")
            return

        # Retrieve the current project variables
        variables = project.get_variables()

        # Ensure valid variable type
        if variable_type not in ["standard", "local"]:
            raise ValueError(f"Invalid variable_type '{variable_type}'. Must be 'standard' or 'local'.")

        # Get the current variables in the specified scope
        current_variables = variables.get(variable_type, {})

        if replace:
            # Replace all variables with new ones
            variables[variable_type] = new_variables
            logging.info(f"Replacing all {variable_type} variables on project '{project.project_key}' with: {new_variables}")
        else:
            # Merge variables
            if replace_existing_keys:
                current_variables.update(new_variables)  # Overwrite existing keys
            else:
                # Add only new keys without replacing existing ones
                current_variables = {**new_variables, **current_variables}  # Ensures new keys do not overwrite

            # Save back to the project variables
            variables[variable_type] = current_variables
            logging.info(f"Updated {variable_type} variables on project '{project.project_key}': {new_variables}")

        # Apply updated variables
        project.set_variables(variables)

    except Exception as e:
        logging.exception(f"Error setting {variable_type} variables on project '{project.project_key}'")

def get_project_variables(variable_type=None, project=None):
    """
    Retrieve project variables in Dataiku DSS.

    Args:
        variable_type (str | None): 
            - "standard": return only standard variables.
            - "local": return only local variables.
            - None: return the full variable dict (both standard + local).
        project (dataikuapi.dss.project.DSSProject, optional):
            Project object. If None, the default project is used.

    Returns:
        dict: Variables from the requested scope, or the full variable structure if variable_type=None.

    Raises:
        ValueError: If variable_type is not "standard", "local", or None.
    """
    if project is None:
        project = client.get_default_project()

    variables = project.get_variables()

    if variable_type is None:
        return variables  # Both standard + local

    if variable_type not in ["standard", "local"]:
        raise ValueError(
            f"Invalid variable_type '{variable_type}'. Expected 'standard', 'local', or None."
        )

    return variables.get(variable_type, {}) or {}

def convert_dku_ts_to_dt(timestamp_ms):
    """
    Converts a Dataiku timestamp (in milliseconds) into a formatted datetime (UTC).
    
    Args:
        timestamp_ms (int or str): Timestamp in milliseconds.
        
    Returns:
        str: Formatted date and time in ISO 8601 format.
    """
    timestamp_s = int(timestamp_ms) / 1000  # Convert to seconds
    date_time = datetime.utcfromtimestamp(timestamp_s)  # Convert to UTC datetime
    return date_time.isoformat()