import json
import logging

import pandas as pd

from dataiku.llm.evaluation.exceptions import PromptRecipeInputException
from dataiku.llm.evaluation.utils import failure_utils
from dataiku.llm.evaluation.utils.common import PROMPT_RECIPE_RAW_QUERY_NAME, PROMPT_RECIPE_RAW_RESPONSE_NAME, _get_trajectory

logger = logging.getLogger(__name__)


def _read_prompt_output_from_json_cell(json_string: str):
    try:
        return json.loads(json_string).get('text')
    except:
        return None


def _reconstruct_prompt_input_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
        chat_messages = raw_json.get('messages', [])
        user_messages = [m for m in chat_messages if m.get('role') == "user"][0]
        if any(e.get('type') in ['IMAGE_INLINE', 'IMAGE_REF'] for e in user_messages.get("parts", [])):
            raise PromptRecipeInputException('Can\'t handle an image input from prompt recipe.')
        return '\n'.join(m.get('role') + ': ' + m.get('content') for m in chat_messages)
    except PromptRecipeInputException:
        raise
    except:
        return None


def _read_context_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
        sources = raw_json.get('sources', [])
        excerpts = [s.get('excerpt', {}) for s in sources]
        if any(e.get('type') == 'IMAGE_REF' for e in excerpts):
            contexts = sources  # The parsing will be done later because we don't want the encoded image in the parsed context
        else:
            contexts = [e.get('text') for e in excerpts if e.get('type') == 'TEXT']
        return [c for c in contexts if c]
    except Exception as e:
        return []


def _read_tool_calls_from_json_cell(json_string: str):
    trajectory = _get_trajectory(json_string)
    if not trajectory:
        return []
    trajectory_elements = trajectory.get("agentLoop", [])
    return [e for e in trajectory_elements if e.get("type") == "TOOLCALL"]

def _read_trajectory_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
    except Exception as e:
        logger.warning(f"Can't read trajectory {json_string}, it won't be available: {e}")
        return {}
    parsed_trajectory = raw_json.get('additionalInformation', {}).get('trajectory', {})
    return parsed_trajectory

def has_raw_response(input_df: pd.DataFrame) -> bool:
    """
    Verify that the input_df contains the raw response from the Prompt Recipe
    :param input_df:
    """
    if input_df.get(PROMPT_RECIPE_RAW_RESPONSE_NAME) is None:
        # Do not raise but produce a warning. Not having answers is strange, but some metrics don't need it.
        failure_utils.warn(f'Can\'t find column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}". Check that your input dataset was produced by a prompt recipe with "Raw response output mode" set to "Raw" or "Raw without traces".')
        return False
    else:
        return True


def try_get_reconstructed_prompt_recipe_input(input_df: pd.DataFrame) -> pd.Series:
    """
    Try to parse and reconstruct input from a PromptRecipe's "raw" input (Raw Query)
    :param input_df:
    :return: pd.Series with the reconstructed input
    """
    raw_query = input_df.get(PROMPT_RECIPE_RAW_QUERY_NAME)
    if raw_query is None:
        # input is mandatory
        raise PromptRecipeInputException(f'Can\'t find column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}". Check that your input dataset was produced by a prompt recipe with "Raw query output mode" set to "Raw".')
    logger.info(f'Column "{PROMPT_RECIPE_RAW_QUERY_NAME}" is from a prompt recipe, trying to parse it for inputs')
    inputs_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_QUERY_NAME].apply(_reconstruct_prompt_input_from_json_cell)
    if not any(inputs_from_prompt_recipe):
        # input is mandatory
        raise PromptRecipeInputException(f'Can\'t find inputs from a prompt recipe in "{PROMPT_RECIPE_RAW_QUERY_NAME}". Check that your input dataset was produced by a prompt recipe with "Raw query output mode" set to "Raw".')
    return inputs_from_prompt_recipe


def try_get_parsed_prompt_recipe_output(input_df: pd.DataFrame) -> pd.Series:
    """
    Try to parse output from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :return: pd.Series with the parsed output or empty Series if the parsing is unsuccessful.
    """
    logger.info(f'Column "{PROMPT_RECIPE_RAW_QUERY_NAME}" is from a prompt recipe, trying to parse it for output')
    outputs_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_prompt_output_from_json_cell)
    if any(outputs_from_prompt_recipe):
        logger.info(f'Found outputs in "{PROMPT_RECIPE_RAW_RESPONSE_NAME}", from a prompt recipe. Parsing it.')
        return outputs_from_prompt_recipe
    else:
        # Do not raise but produce a warning. Not having answers is strange, but some metrics don't need it.
        failure_utils.warn(f'Can\'t find outputs from a prompt recipe in "{PROMPT_RECIPE_RAW_RESPONSE_NAME}". Did your LLM produce answers ?')
        return pd.Series(dtype=object)


def try_get_parsed_prompt_recipe_context(input_df: pd.DataFrame, is_context_needed: bool) -> pd.Series:
    """
    Try to parse contexts from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :param is_context_needed: True if some non-custom metric need the context
    :return: pd.Series with the parsed contexts or empty Series if parsing is unsuccessful
    """
    logger.info(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" is from a prompt recipe, trying to parse it for context/sources')
    contexts_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_context_from_json_cell)
    if any(contexts_from_prompt_recipe):
        logger.info(f'Found contexts/sources in "{PROMPT_RECIPE_RAW_RESPONSE_NAME}", from a prompt recipe. Parsing it.')
    else:
        failure_utils.warn(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" does not contain parsable contexts/sources from a prompt recipe (it might be empty), won\'t parse it.',
                           raise_diagnostic=is_context_needed)
    return contexts_from_prompt_recipe


def try_get_parsed_prompt_recipe_tool_calls(input_df: pd.DataFrame, are_tools_needed: bool) -> pd.Series:
    """
    Try to parse contexts from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :param are_tools_needed: True if some non-custom metric need the tools
    :return: pd.Series, one item per input row. Each item is a list of tool calls
    """
    logger.info(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" is from a prompt recipe, trying to parse it for tool calls')
    tool_calls_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_tool_calls_from_json_cell)
    if any(item is not None for item in tool_calls_from_prompt_recipe):
        logger.info(f'Found tool calls in "{PROMPT_RECIPE_RAW_RESPONSE_NAME}", from a prompt recipe. Parsing it.')
    else:
        failure_utils.warn(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" does not contain parsable tool calls from a prompt recipe (it might be empty), won\'t parse it.',
                           raise_diagnostic=are_tools_needed)
    return tool_calls_from_prompt_recipe

def try_get_parsed_prompt_recipe_trajectory(input_df: pd.DataFrame) -> pd.Series:
    """
    Try to parse trajectory from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :return: pd.Series, one item per input row. Each item is a trajectory
    """
    logger.info(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" is from a prompt recipe, trying to parse it for trajectory')
    trajectory_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_trajectory_from_json_cell)
    if any(item is not None for item in trajectory_from_prompt_recipe):
        logger.info(f'Found trajectory in "{PROMPT_RECIPE_RAW_RESPONSE_NAME}", from a prompt recipe. Parsing it.')
    else:
        failure_utils.warn(f'Column "{PROMPT_RECIPE_RAW_RESPONSE_NAME}" does not contain a parsable trajectory from a prompt recipe (it might be empty), won\'t parse it.')
    return trajectory_from_prompt_recipe

