import uuid
import logging

import dataiku

from ..analytic.analytic_functions import (
    analytic_clustering, 
    analytic_time_series_forecasting,
    analytic_outlier_detection,
    analytic_root_cause_analysis
)
from ..utils.common import convert_df_to_str
from ..dku_utils.recipe import generate_code_python

def analytic_clustering_tool(analytic_tool_args: dict) -> str:
    """
    Performs clustering analysis on a dataset and saves results to output datasets.
    
    Args:
        analytic_tool_args: dict containing:
            - analysis_session_id: str
            - input_dataset_name: str
            - output_data_with_cluster_dataset_name: str
            - output_insight_dataset_name: str
            - feature_columns: List[str]
            - n_clusters: Optional[int]
            - max_clusters: int
            - random_state: int
            - standardize: bool
            - max_categories_for_onehot: int
    
    Returns:
        str: Execution status, insights, and a preview of clustering results.
    """
    
    MAX_PREVIEW_ROWS = 20

    # Pop analysis_id from analytic_tool_args
    analysis_session_id = analytic_tool_args.get("analysis_session_id")

    recipe_name = f"perform_analytic_clustering_{str(uuid.uuid4())[:8]}"

    input_dataset_name = analytic_tool_args.get("input_dataset_name")
    
    # generate a new output dataset name with a random suffix to avoid conflicts
    output_data_with_cluster_dataset_name = analytic_tool_args.get("output_data_with_cluster_dataset_name")
    output_data_with_cluster_dataset_name = f"{output_data_with_cluster_dataset_name}_{str(uuid.uuid4())[:8]}"

    output_insight_dataset_name = analytic_tool_args.get("output_insight_dataset_name")
    output_insight_dataset_name = f"{output_insight_dataset_name}_{str(uuid.uuid4())[:8]}"

    try:
        #analysis_id = get_analysis_id()

        python_code = f"""from agentic_insights.backend.agentic_insights_toolkit.analytic.analytic_functions import analytic_clustering
result = analytic_clustering(
    input_dataset_name = "{input_dataset_name}",
    output_data_with_cluster_dataset_name = "{output_data_with_cluster_dataset_name}",
    output_insight_dataset_name = "{output_insight_dataset_name}",
    feature_columns = {repr(analytic_tool_args.get("feature_columns", []))},
    n_clusters = {analytic_tool_args.get("n_clusters", 3)},
    max_clusters = {analytic_tool_args.get("max_clusters", 10)},
    random_state = {analytic_tool_args.get("random_state", 42)},
    standardize = {analytic_tool_args.get("standardize", True)},
    max_categories_for_onehot = {analytic_tool_args.get("max_categories_for_onehot", 10)}
)
"""

        recipe = generate_code_python(
            input_datasets = [input_dataset_name],
            output_datasets = [output_data_with_cluster_dataset_name, output_insight_dataset_name],
            python_code = python_code,
            recipe_name = recipe_name,
            flow_zone_name = analysis_session_id
        )

        job = recipe.run(no_fail=True)
        job_status = job.get_status()

        if job_status['baseStatus']['state'] == "FAILED":
            return (
                f"❌ Error: clustering failed.\n"
                f"➡ Issue: {job_status.get('errorMessage', 'Unknown error')}"
            )

        # Load clustering results
        df_with_clusters = dataiku.Dataset(output_data_with_cluster_dataset_name).get_dataframe(infer_with_pandas=False)
        df_with_clusters_preview = df_with_clusters.head(MAX_PREVIEW_ROWS)
        df_with_clusters_preview_str = convert_df_to_str(df_with_clusters_preview)
    
        # Load insights - display full table
        df_insights = dataiku.Dataset(output_insight_dataset_name).get_dataframe(infer_with_pandas=False)
        df_insights_str = convert_df_to_str(df_insights)

        # Build output dynamically
        output_parts = []
        
        # Header
        output_parts.append(f"Clustering applied on '{input_dataset_name}'.\n")
        
        # Dataset information
        output_parts.append(f"==> Detailed data with clusters written to '{output_data_with_cluster_dataset_name}' dataset ({len(df_with_clusters)} rows)")
        
        # Metrics/Insights table
        output_parts.append(f"==> Metrics/Insights written to '{output_insight_dataset_name}' Dataset:")
        output_parts.append(f"{df_insights_str}")
        output_parts.append("")
        
        # Preview of detailed data
        output_parts.append(f"==> Preview of detailed data with clusters (first {len(df_with_clusters_preview)} of {len(df_with_clusters)} rows):")
        output_parts.append(f"{df_with_clusters_preview_str}")

        return "\n".join(output_parts)

    except Exception as e:
        logging.exception("Error in analytic_clustering_tool")
        return f"❌ Error: {str(e)}"


def analytic_time_series_forecasting_tool(analytic_tool_args: dict) -> str:
    """
    Performs time series forecasting analysis on a dataset using Prophet and saves results to output datasets.
    This is the simplified version that uses only Prophet for forecasting.
    
    Args:
        analytic_tool_args: dict containing:
            - analysis_session_id: str
            - input_dataset_name: str
            - output_data_with_forecast_dataset_name: str
            - output_insight_dataset_name: str
            - date_column: str
            - value_column: str (single column, not a list)
            - split_column: Optional[str] (to split forecast by a column if needed)
            - aggregation_freq: str (required, e.g., "D", "W", "M", "Q", "Y")
            - aggregation_method: str (e.g., "mean", "sum", "median")
            - forecast_horizon: int (number of periods to forecast)
    
    Returns:
        str: Execution status, insights, and a preview of forecasting results.
    """
    
    MAX_PREVIEW_ROWS = 2000

    # Pop analysis_id from analytic_tool_args
    analysis_session_id = analytic_tool_args.get("analysis_session_id")

    recipe_name = f"perform_analytic_time_series_forecasting_{str(uuid.uuid4())[:8]}"

    input_dataset_name = analytic_tool_args.get("input_dataset_name")
    
    # generate a new output dataset name with a random suffix to avoid conflicts
    output_data_with_forecast_dataset_name = analytic_tool_args.get("output_data_with_forecast_dataset_name")
    output_data_with_forecast_dataset_name = f"{output_data_with_forecast_dataset_name}_{str(uuid.uuid4())[:8]}"

    output_insight_dataset_name = analytic_tool_args.get("output_insight_dataset_name")
    output_insight_dataset_name = f"{output_insight_dataset_name}_{str(uuid.uuid4())[:8]}"

    try:
        # Prepare arguments for the function call
        date_column = analytic_tool_args.get("date_column")
        value_column = analytic_tool_args.get("value_column")
        split_column = analytic_tool_args.get("split_column", None)
        aggregation_freq = analytic_tool_args.get("aggregation_freq")
        aggregation_method = analytic_tool_args.get("aggregation_method", "mean")
        forecast_horizon = analytic_tool_args.get("forecast_horizon", 12)

        # Build Python code with proper handling of None for split_column
        split_column_str = f'"{split_column}"' if split_column else "None"
        
        python_code = f"""from agentic_insights.backend.agentic_insights_toolkit.analytic.analytic_functions import analytic_time_series_forecasting
result = analytic_time_series_forecasting(
    input_dataset_name = "{input_dataset_name}",
    output_data_with_forecast_dataset_name = "{output_data_with_forecast_dataset_name}",
    output_insight_dataset_name = "{output_insight_dataset_name}",
    date_column = "{date_column}",
    value_column = "{value_column}",
    aggregation_freq = "{aggregation_freq}",
    aggregation_method = "{aggregation_method}",
    forecast_horizon = {forecast_horizon},
    split_column = {split_column_str}
)
"""

        recipe = generate_code_python(
            input_datasets = [input_dataset_name],
            output_datasets = [output_data_with_forecast_dataset_name, output_insight_dataset_name],
            python_code = python_code,
            recipe_name = recipe_name,
            flow_zone_name = analysis_session_id
        )

        job = recipe.run(no_fail=True)
        job_status = job.get_status()

        if job_status['baseStatus']['state'] == "FAILED":
            return (
                f"❌ Error: time series forecasting failed.\n"
                f"➡ Issue: {job_status.get('errorMessage', 'Unknown error')}"
            )

        # Load forecasting results
        df_with_forecast = dataiku.Dataset(output_data_with_forecast_dataset_name).get_dataframe(infer_with_pandas=False)
        df_with_forecast_preview = df_with_forecast.head(MAX_PREVIEW_ROWS)
        df_with_forecast_preview_str = convert_df_to_str(df_with_forecast_preview, use_toon = True)
    
        # Load insights - display full table
        df_insights = dataiku.Dataset(output_insight_dataset_name).get_dataframe(infer_with_pandas=False)
        df_insights_str = convert_df_to_str(df_insights)

        # Build output dynamically
        output_parts = []
        
        # Header
        output_parts.append(f"Time Series Forecasting applied on '{input_dataset_name}'.\n")
        
        # Dataset information
        output_parts.append(f"==> Detailed data with forecasts written to '{output_data_with_forecast_dataset_name}' dataset ({len(df_with_forecast)} rows)")
        
        # Metrics/Insights table
        output_parts.append(f"==> Metrics/Insights written to '{output_insight_dataset_name}' Dataset:")
        output_parts.append(f"{df_insights_str}")
        output_parts.append("")
        
        # Preview of detailed data
        output_parts.append(f"==> Preview of detailed data with forecasts (first {len(df_with_forecast_preview)} of {len(df_with_forecast)} rows):")
        output_parts.append(f"{df_with_forecast_preview_str}")

        return "\n".join(output_parts)

    except Exception as e:
        logging.exception("Error in analytic_time_series_forecasting_tool")
        return f"❌ Error: {str(e)}"


def analytic_outlier_detection_tool(analytic_tool_args: dict) -> str:
    """
    Performs outlier detection analysis on a dataset using Isolation Forest and saves results to output datasets.
    
    Args:
        analytic_tool_args: dict containing:
            - analysis_session_id: str
            - input_dataset_name: str
            - output_data_with_outliers_dataset_name: str
            - output_insight_dataset_name: str
            - feature_columns: List[str]
            - contamination: float (default: 0.05)
    
    Returns:
        str: Execution status, insights, and a preview of outlier detection results.
    """
    
    MAX_PREVIEW_ROWS = 20

    analysis_session_id = analytic_tool_args.get("analysis_session_id")
    recipe_name = f"perform_analytic_outlier_detection_{str(uuid.uuid4())[:8]}"
    input_dataset_name = analytic_tool_args.get("input_dataset_name")
    
    output_data_with_outliers_dataset_name = analytic_tool_args.get("output_data_with_outliers_dataset_name")
    output_data_with_outliers_dataset_name = f"{output_data_with_outliers_dataset_name}_{str(uuid.uuid4())[:8]}"
    output_insight_dataset_name = analytic_tool_args.get("output_insight_dataset_name")
    output_insight_dataset_name = f"{output_insight_dataset_name}_{str(uuid.uuid4())[:8]}"

    try:
        python_code = f"""from agentic_insights.backend.agentic_insights_toolkit.analytic.analytic_functions import analytic_outlier_detection
result = analytic_outlier_detection(
    input_dataset_name = "{input_dataset_name}",
    output_data_with_outliers_dataset_name = "{output_data_with_outliers_dataset_name}",
    output_insight_dataset_name = "{output_insight_dataset_name}",
    feature_columns = {repr(analytic_tool_args.get("feature_columns", []))},
    contamination = {analytic_tool_args.get("contamination", 0.05)}
)
"""

        recipe = generate_code_python(
            input_datasets = [input_dataset_name],
            output_datasets = [output_data_with_outliers_dataset_name, output_insight_dataset_name],
            python_code = python_code,
            recipe_name = recipe_name,
            flow_zone_name = analysis_session_id
        )

        job = recipe.run(no_fail=True)
        job_status = job.get_status()

        if job_status['baseStatus']['state'] == "FAILED":
            return (
                f"❌ Error: outlier detection failed.\n"
                f"➡ Issue: {job_status.get('errorMessage', 'Unknown error')}"
            )

        # Load outlier detection results
        df_with_outliers = dataiku.Dataset(output_data_with_outliers_dataset_name).get_dataframe(infer_with_pandas=False)
        df_with_outliers_preview = df_with_outliers.head(MAX_PREVIEW_ROWS)
        df_with_outliers_preview_str = convert_df_to_str(df_with_outliers_preview)
    
        # Load insights - display full table
        df_insights = dataiku.Dataset(output_insight_dataset_name).get_dataframe(infer_with_pandas=False)
        df_insights_str = convert_df_to_str(df_insights)

        # Build output dynamically
        output_parts = []
        
        # Header
        output_parts.append(f"Outlier Detection applied on '{input_dataset_name}'.\n")
        
        # Dataset information
        output_parts.append(f"==> Detailed data with outliers written to '{output_data_with_outliers_dataset_name}' dataset ({len(df_with_outliers)} rows)")
        
        # Metrics/Insights table
        output_parts.append(f"==> Metrics/Insights written to '{output_insight_dataset_name}' Dataset:")
        output_parts.append(f"{df_insights_str}")
        output_parts.append("")
        
        # Preview of detailed data
        output_parts.append(f"==> Preview of detailed data with outliers (first {len(df_with_outliers_preview)} of {len(df_with_outliers)} rows):")
        output_parts.append(f"{df_with_outliers_preview_str}")

        return "\n".join(output_parts)

    except Exception as e:
        logging.exception("Error in analytic_outlier_detection_tool")
        return f"❌ Error: {str(e)}"


def analytic_root_cause_analysis_tool(analytic_tool_args: dict) -> str:
    """
    Performs root cause analysis on a dataset and saves results to output dataset.
    
    Args:
        analytic_tool_args: dict containing:
            - analysis_session_id: str
            - input_dataset_name: str
            - output_insight_dataset_name: str
            - target_column: str
            - feature_columns: List[str]
            - max_interactions: int
            - min_correlation_threshold: float
    
    Returns:
        str: Execution status and insights.
    """
    
    analysis_session_id = analytic_tool_args.get("analysis_session_id")
    recipe_name = f"perform_analytic_root_cause_analysis_{str(uuid.uuid4())[:8]}"
    input_dataset_name = analytic_tool_args.get("input_dataset_name")
    
    output_insight_dataset_name = analytic_tool_args.get("output_insight_dataset_name")
    output_insight_dataset_name = f"{output_insight_dataset_name}_{str(uuid.uuid4())[:8]}"

    try:
        python_code = f"""from agentic_insights.backend.agentic_insights_toolkit.analytic.analytic_functions import analytic_root_cause_analysis
result = analytic_root_cause_analysis(
    input_dataset_name = "{input_dataset_name}",
    output_insight_dataset_name = "{output_insight_dataset_name}",
    target_column = "{analytic_tool_args.get("target_column")}",
    feature_columns = {repr(analytic_tool_args.get("feature_columns", []))},
    max_interactions = {analytic_tool_args.get("max_interactions", 5)},
    min_correlation_threshold = {analytic_tool_args.get("min_correlation_threshold", 0.1)}
)
"""

        recipe = generate_code_python(
            input_datasets = [input_dataset_name],
            output_datasets = [output_insight_dataset_name],
            python_code = python_code,
            recipe_name = recipe_name,
            flow_zone_name = analysis_session_id
        )

        job = recipe.run(no_fail=True)
        job_status = job.get_status()

        if job_status['baseStatus']['state'] == "FAILED":
            return (
                f"❌ Error: root cause analysis failed.\n"
                f"➡ Issue: {job_status.get('errorMessage', 'Unknown error')}"
            )

        # Load insights - display full table
        df_insights = dataiku.Dataset(output_insight_dataset_name).get_dataframe(infer_with_pandas=False)
        df_insights_str = convert_df_to_str(df_insights)

        # Build output dynamically
        output_parts = []
        
        # Header
        output_parts.append(f"Root Cause Analysis applied on '{input_dataset_name}'.\n")
        output_parts.append(f"==> Target column: '{analytic_tool_args.get('target_column')}'")
        
        # Metrics/Insights table
        output_parts.append(f"==> Metrics/Insights written to '{output_insight_dataset_name}' Dataset:")
        output_parts.append(f"{df_insights_str}")

        return "\n".join(output_parts)

    except Exception as e:
        logging.exception("Error in analytic_root_cause_analysis_tool")
        return f"❌ Error: {str(e)}"

