import json
import logging
from typing import Tuple

import pandas as pd

from dataiku.llm.evaluation.exceptions import PromptRecipeInputException
from dataiku.llm.evaluation.utils import failure_utils

logger = logging.getLogger(__name__)

PROMPT_RECIPE_RAW_QUERY_NAME = 'llm_raw_query'   # Keep in sync with PromptRecipeSchemaComputer.java
PROMPT_RECIPE_RAW_RESPONSE_NAME = 'llm_raw_response'   # Keep in sync with PromptRecipeSchemaComputer.java


def _read_prompt_output_from_json_cell(json_string: str):
    try:
        return json.loads(json_string).get('text')
    except:
        return None


def _reconstruct_prompt_input_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
        chat_messages = raw_json.get('messages', [])
        user_messages = [m for m in chat_messages if m.get('role') == "user"][0]
        if any(e.get('type') in ['IMAGE_INLINE', 'IMAGE_REF'] for e in user_messages.get("parts", [])):
            raise PromptRecipeInputException('Can\'t handle an image input from prompt recipe.')
        return '\n'.join(m.get('role') + ': ' + m.get('content') for m in chat_messages)
    except PromptRecipeInputException:
        raise
    except:
        return None


def _read_context_from_json_cell(json_string: str):
    try:
        raw_json = json.loads(json_string)
        sources = raw_json.get('sources', [])
        excerpts = [s.get('excerpt', {}) for s in sources]
        if any(e.get('type') == 'IMAGE_REF' for e in excerpts):
            contexts = sources  # The parsing will be done later because we don't want the encoded image in the parsed context
        else:
            contexts = [e.get('text') for e in excerpts if e.get('type') == 'TEXT']
        return [c for c in contexts if c]
    except Exception as e:
        return []


def has_raw_response(input_df: pd.DataFrame) -> bool:
    """
    Verify that the input_df contains the raw response from the Prompt Recipe
    :param input_df:
    """
    if input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME] is None:
        # Do not raise but produce a warning. Not having answers is strange, but some metrics don't need it.
        failure_utils.warn('Can\'t find column "%s". Check that your input dataset was produced by a prompt recipe with "Raw response output mode" set to "Raw" or "Raw without traces".' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
        return False
    else:
        return True


def try_get_reconstructed_prompt_recipe_input(input_df: pd.DataFrame) -> pd.Series:
    """
    Try to parse and reconstruct input from a PromptRecipe's "raw" input (Raw Query)
    :param input_df:
    :return: pd.Series with the reconstructed input
    """
    raw_query = input_df[PROMPT_RECIPE_RAW_QUERY_NAME]
    if raw_query is None:
        # input is mandatory
        raise PromptRecipeInputException('Can\'t find column "%s". Check that your input dataset was produced by a prompt recipe with "Raw query output mode" set to "Raw".' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
    logger.info('Column "%s" is from a prompt recipe, trying to parse it for inputs' % PROMPT_RECIPE_RAW_QUERY_NAME)
    inputs_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_QUERY_NAME].apply(_reconstruct_prompt_input_from_json_cell)
    if not any(inputs_from_prompt_recipe):
        # input is mandatory
        raise PromptRecipeInputException('Can\'t find inputs from a prompt recipe in "%s". Check that your input dataset was produced by a prompt recipe with "Raw query output mode" set to "Raw".' % PROMPT_RECIPE_RAW_QUERY_NAME)
    return inputs_from_prompt_recipe


def try_get_parsed_prompt_recipe_output(input_df: pd.DataFrame) -> pd.Series:
    """
    Try to parse output from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :return: pd.Series with the parsed output or empty Series if the parsing is unsuccessful.
    """
    logger.info('Column "%s" is from a prompt recipe, trying to parse it for output' % PROMPT_RECIPE_RAW_QUERY_NAME)
    outputs_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_prompt_output_from_json_cell)
    if any(outputs_from_prompt_recipe):
        logger.info('Found outputs in "%s", from a prompt recipe. Parsing it.' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
        return outputs_from_prompt_recipe
    else:
        # Do not raise but produce a warning. Not having answers is strange, but some metrics don't need it.
        failure_utils.warn('Can\'t find outputs from a prompt recipe in "%s". Did your LLM produce answers ?' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
        return pd.Series(dtype=object)


def try_get_parsed_prompt_recipe_context(input_df: pd.DataFrame, is_context_needed: bool) -> Tuple[pd.Series, bool]:
    """
    Try to parse contexts from a PromptRecipe's "raw" output (Raw Response)
    :param input_df:
    :param is_context_needed: True if some non-custom metric need the context
    :return: pd.Series with the parsed contexts or empty Series if parsing is unsuccessful ; boolean True if the source has multimodal context, else False
    """
    logger.info('Column "%s" is from a prompt recipe, trying to parse it for context/sources' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
    contexts_from_prompt_recipe = input_df[PROMPT_RECIPE_RAW_RESPONSE_NAME].apply(_read_context_from_json_cell)
    if any(contexts_from_prompt_recipe):
        logger.info('Found contexts/sources in "%s", from a prompt recipe. Parsing it.' % PROMPT_RECIPE_RAW_RESPONSE_NAME)
    else:
        failure_utils.warn('Column "%s" does not contain parsable contexts/sources from a prompt recipe (it might be empty), won\'t parse it.' % PROMPT_RECIPE_RAW_RESPONSE_NAME,
                           raise_diagnostic=is_context_needed)
    return contexts_from_prompt_recipe
