import uuid
import logging
import pandas as pd
from datetime import datetime

import dataiku
from dataikuapi.dss.dataset import DSSDataset

from ..dku_utils.utils import write_data_into_folder, read_and_concat_json_from_folder, read_file_from_folder
from ..dku_utils.object import get_or_create_managed_folder, get_or_create_flow_zone, find_webapp
from ..adm.common import get_analysis_session_history_folder, get_analysis_report_folder, get_analysis_zone_default_metadata
from ..dku_utils.utils import extract_relevant_insights_from_dku_scenario_run, set_project_variables, convert_dku_ts_to_dt

client = dataiku.api_client()
project = client.get_default_project()

MAIN_ANALYSIS_SESSION_SCENARIO = "MAIN_ANALYSIS_SESSION_SCENARIO"

def generate_analysis_session_id():
    analysis_session_id = f"analysis_session_{str(uuid.uuid4())[:8]}"
    return analysis_session_id

def update_analysis_session_metadata(analysis_session_id: str, metadata: dict):
    session_folder = get_analysis_session_history_folder()
    session_metadata_path = f"{analysis_session_id}/metadata.json"
    write_data_into_folder(data_type="json", data=metadata, folder=session_folder, file_path=session_metadata_path)
    return None

def delete_analysis_session_metadata(analysis_session_id: str):
    session_folder = get_analysis_session_history_folder()
    session_metadata_path = f"{analysis_session_id}/metadata.json"
    session_folder.delete_path(session_metadata_path)
    return None

def get_all_analysis_session_metadata():
    try:
        session_folder = get_analysis_session_history_folder()
        all_sessions = read_and_concat_json_from_folder(session_folder)
        #all_sessions.insert(0, "analysis_session_id", all_sessions["file_path"].str.strip("/").str.split("/").str[0])
        #all_sessions.drop(['file_path'], axis = 1, errors = "ignore", inplace = True)
        return all_sessions
    except Exception as e:
        return pd.DataFrame()

def get_analysis_session_metadata(analysis_session_id: str):
    """Get metadata for a specific analysis session."""
    session_folder = get_analysis_session_history_folder()
    session_metadata_path = f"{analysis_session_id}/metadata.json"
    try:
        metadata = read_file_from_folder(
            folder=session_folder,
            file_name=session_metadata_path
        )
        if isinstance(metadata, str):
            import json
            metadata = json.loads(metadata)
        return metadata
    except Exception as e:
        logging.exception(f"Error reading metadata for {analysis_session_id}")
        return None

def get_analysis_report(analysis_session_id: str):
    report_folder = get_analysis_report_folder()
    report_content = read_file_from_folder(
        folder = report_folder,
        file_name = f"{analysis_session_id}.html")
    return report_content

def delete_analysis_session_flow_zone(analysis_session_id: str):
    flow_zone = get_or_create_flow_zone(flow_zone_name = analysis_session_id)
    #default_tags = set(get_analysis_zone_default_metadata().get("tags"))
    #z_tags = flow_zone.get_settings().get_raw().get("tags") or []
    if flow_zone:
        zone_items = flow_zone.items
        for item in zone_items:
            # If item is a dataset, delete the data
            try:
                item.delete(drop_data=True)
            except Exception as e:
                logging.exception(f"Error deleting item in flow zone {analysis_session_id}")
                pass
            
            # If item is a recipe, delete the recipe
            try:
                item.delete()
            except Exception as e:
                logging.exception(f"Error deleting recipe in flow zone {analysis_session_id}")
                pass
        
        # If flow zone is not empty, delete the flow zone
        try:
            flow_zone.delete()
        except Exception as e:
            logging.exception(f"Error deleting flow zone {analysis_session_id}")
            pass

        return None

def delete_analysis_session_report(analysis_session_id: str):
    report_folder = get_analysis_report_folder()
    try:
        report_folder.delete_path(f"{analysis_session_id}.html")
    except Exception as e:
        logging.exception(f"Error deleting report for {analysis_session_id}")
        pass
    return None

def delete_analysis_session_metadata(analysis_session_id: str):
    session_folder = get_analysis_session_history_folder()
    try:
        if analysis_session_id:
            session_folder.delete_path(f"{analysis_session_id}")
    except Exception as e:
        pass
    return None

def delete_analysis_session(analysis_session_id: str, delete_flow_zone = True, delete_report = True, delete_metadata = True):
    if delete_flow_zone:
        delete_analysis_session_flow_zone(analysis_session_id)
    if delete_report:
        delete_analysis_session_report(analysis_session_id)
    if delete_metadata:
        delete_analysis_session_metadata(analysis_session_id)
    return None

def get_available_analytic_tools(only_active = True, dataset_name = "analytic_tools"):
    try:
        dataset = dataiku.Dataset(dataset_name)
        analytic_tool_df = dataset.get_dataframe(infer_with_pandas = False)
        
        # Return empty dataframe if dataset is empty
        if analytic_tool_df.empty:
            return analytic_tool_df
        
        if only_active:
            analytic_tool_df = analytic_tool_df[analytic_tool_df['flag_active'].astype(bool)]
        
        analytic_tool_df.drop('flag_active', axis=1, errors='ignore', inplace=True)
        return analytic_tool_df
    except Exception:
        # Return empty dataframe if dataset doesn't exist or can't be read
        return pd.DataFrame()


def get_available_business_rules(only_active = True, dataset_name = "business_rules"):
    try:
        dataset = dataiku.Dataset(dataset_name)
        df = dataset.get_dataframe(infer_with_pandas = False)
        
        # Return empty dataframe if dataset is empty
        if df.empty:
            return df
        
        if only_active:
            df = df.loc[df['flag_active'].astype(bool)]
        
        df = df.drop("flag_active", axis=1, errors="ignore")
        return df
    except Exception:
        # Return empty dataframe if dataset doesn't exist or can't be read
        import pandas as pd
        return pd.DataFrame()


## Main Analysis Session Scenario
def trigger_main_analysis_session_scenario(
    scenario_id = MAIN_ANALYSIS_SESSION_SCENARIO,
    project = project
    ):
    """Run the scenario without waiting (non-blocking)."""
    if project is None:
        project = client.get_default_project()
        
    scenario = project.get_scenario(scenario_id)
    run = scenario.run_and_wait()

    run_insights = extract_relevant_insights_from_dku_scenario_run(run)

    return run_insights

def abort_main_analysis_session_scenario(
    scenario_id = MAIN_ANALYSIS_SESSION_SCENARIO,
    project = project
    ):
    """Run the scenario without waiting (non-blocking)."""
    if project is None:
        project = client.get_default_project()
        
    scenario = project.get_scenario(scenario_id)
    scenario.abort()

    return None

def get_main_analysis_session_scenario_run_status(
    scenario_id = MAIN_ANALYSIS_SESSION_SCENARIO, 
    project = project):
    
    if project is None:
        project = client.get_default_project()
        
    scenario = project.get_scenario(scenario_id)

    runs = scenario.get_last_runs()

    all_run_insights = []

    for run in runs:
        run_insights = extract_relevant_insights_from_dku_scenario_run(run)
        all_run_insights.append(run_insights)

    if all_run_insights:
        run_insights_df = pd.DataFrame(all_run_insights)
        run_insights_df.sort_values("start_dt_utc", ascending=False, inplace=True)
        return run_insights_df
    else:
        return pd.DataFrame()

def is_main_analysis_session_scenario_running(
    scenario_id = MAIN_ANALYSIS_SESSION_SCENARIO,
    project = project
):
    """Check if the scenario is currently running."""
    if project is None:
        project = client.get_default_project()
    
    try:
        scenario = project.get_scenario(scenario_id)
        # Get the latest run
        runs = scenario.get_last_runs(limit=1)
        if runs and len(runs) > 0:
            latest_run = runs[0]
            return latest_run.running if hasattr(latest_run, 'running') else False
        return False
    except Exception as e:
        logging.exception("Error checking scenario running status")
        return False



def update_analysis_session_params_in_flow(analysis_session_id):
    set_project_variables(
        new_variables = {
            "SELECTED_ANALYSIS_SESSION_ID": analysis_session_id
        },
        variable_type='local'
    )


def get_report_assistant_webapp_relative_path(project=None):
    """
    Get the relative path to the report assistant webapp.
    
    Args:
        project (dataikuapi.dss.project.DSSProject, optional):
            Project object. If None, the default project is used.
    
    Returns:
        str: The relative path to the webapp (e.g., 'projects/PROJECT_KEY/webapps/WEBAPP_ID/view')
    """
    if project is None:
        project = client.get_default_project()
    
    project_key = project.project_key
    
    webapp = find_webapp(webapp_name="ai_report_assistant")
    if webapp is None:
        # Fallback: return empty string if webapp not found
        return ""
    
    webapp_url_id = f"{webapp.webapp_id}_aireportassistant"
    relative_url = f"projects/{project_key}/webapps/{webapp_url_id}/view"
    return relative_url

def list_analysis_zones(project=None):
    """
    List names of Flow Zones that are tagged as analysis zones.

    A Flow Zone is considered an analysis zone if it has at least
    all the default analysis tags (order does not matter).

    Returns:
        pd.DataFrame with columns:
            - zone_name
            - creation_date
        (empty dataframe if none found)
    """
    try:
        if project is None:
            project = dataiku.api_client().get_default_project()

        flow = project.get_flow()
        zones = flow.list_zones()

        # If no zones at all
        if not zones:
            return pd.DataFrame(columns=["zone_name", "creation_date"])

        default_tags = set(get_analysis_zone_default_metadata().get("tags") or [])

        analysis_zone_names = []
        analysis_zone_creation_dates = []

        for zone_object in zones:
            zone = flow.get_zone(zone_object.id)

            z_tags = zone.get_settings().get_raw().get("tags") or []

            # Check tag subset
            if default_tags and not default_tags.issubset(set(z_tags)):
                continue

            creation_date = None

            # Safely get first dataset in zone
            datasets = [item for item in getattr(zone, "items", []) if isinstance(item, DSSDataset)]

            if datasets:
                try:
                    raw_settings = datasets[0].get_settings().get_raw()
                    creation_tag = raw_settings.get("creationTag", {})
                    ts = creation_tag.get("lastModifiedOn")
                    if ts:
                        creation_date = convert_dku_ts_to_dt(ts)
                except Exception:
                    creation_date = None

            analysis_zone_names.append(zone_object.name)
            analysis_zone_creation_dates.append(creation_date)

        # Build dataframe safely
        if not analysis_zone_names:
            return pd.DataFrame(columns=["zone_name", "creation_date"])

        analysis_zones = pd.DataFrame({
            "zone_name": analysis_zone_names,
            "creation_date": analysis_zone_creation_dates
        })

        # Sort safely even if dates are None
        analysis_zones = analysis_zones.sort_values(
            by="creation_date",
            ascending=False,
            na_position="last"
        ).reset_index(drop=True)

        return analysis_zones

    except Exception as e:
        logging.exception("Failed to list analysis zones")
        raise ValueError(f"Failed to list analysis zones: {e}")


def clean_analysis_session_flow_zone(zone_name: str, delete_zone = True):
    flow_zone = get_or_create_flow_zone(flow_zone_name = zone_name)
    #default_tags = set(get_analysis_zone_default_metadata().get("tags"))
    #z_tags = flow_zone.get_settings().get_raw().get("tags") or []
    if flow_zone:
        zone_items = flow_zone.items
        for item in zone_items:
            # If item is a dataset, delete the data
            try:
                item.delete(drop_data=True)
            except Exception as e:
                logging.exception(f"Error deleting item in flow zone {zone_name}")
                pass
            
            # If item is a recipe, delete the recipe
            try:
                item.delete()
            except Exception as e:
                logging.exception(f"Error deleting recipe in flow zone {zone_name}")
                pass
        
        # If flow zone is not empty, delete the flow zone
        if delete_zone:
            flow_zone.delete()

        return None
    
def delete_analysis_zones(n_keep = 5):
    """
    Deletes analysis zones beyond the most recent `n_keep`.
    If n_keep = 0, deletes ALL analysis zones.

    Args:
        n_keep (int): Number of most recent analysis zones to keep.
    """
    
    # Retrieve all analysis zones and sort them lexicographically (oldest first, newest last)
    analysis_zones = list_analysis_zones()
    sorted_analysis_zones = analysis_zones.sort_values("creation_date", ascending=True)
    sorted_zone_names = sorted_analysis_zones['zone_name'].to_list()  # oldest -> newest
    
    # If no zones exist, stop early
    if not sorted_zone_names:
        print("No analysis zones found: nothing to delete.")
        return

    # Special case: n_keep <= 0 → delete ALL zones
    if n_keep <= 0:
        zones_to_delete = sorted_zone_names
        print(f"Deleting ALL {len(zones_to_delete)} analysis zones...")
    else:
        # If the number of zones is already <= n_keep ==> nothing to delete
        if len(sorted_zone_names) <= n_keep:
            print(f"Only {len(sorted_zone_names)} zones found: nothing to delete.")
            return

        # Otherwise, delete all zones except the `n_keep` most recent
        zones_to_delete = sorted_zone_names[:-n_keep]
        print(f"Cleaning {len(zones_to_delete)} old analysis zones (keeping {n_keep} most recent)...")
    
    # Perform deletion
    for z_name in zones_to_delete:
        clean_analysis_session_flow_zone(z_name)
        print(f"Deleted analysis zone: {z_name}")