import base64
import time
from io import BytesIO
from typing import Dict, List, Union

import dataiku
from common.backend.constants import (
    DOCUMENT_EXTENSIONS,
    IMAGE_EXTENSIONS,
)
from common.backend.models.base import (
    MediaSummary,
    UploadChainTypes,
    UploadFileError,
    UploadFileResponse,
)
from common.backend.utils.dataiku_api import dataiku_api
from common.backend.utils.file_extraction.pdf import extract_pdf_text, first_page_to_preview
from common.backend.utils.file_extraction.pptx.pptx_utils import (
    extract_pptx_slides_as_imgs,
    extract_pptx_text,
    save_pptx_slides_next_to_original_document,
)
from common.backend.utils.file_extraction.text import extract_docx_text, extract_plain_text
from common.backend.utils.file_utils import allowed_file, get_file_data
from common.backend.utils.llm_utils import get_llm_capabilities
from common.backend.utils.picture_utils import resize_with_ratio
from common.backend.utils.upload_utils import get_checked_config, save_extracted_json
from common.llm_assist.logging import logger
from PIL import Image
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename

webapp_config: Dict[str, str] = dataiku_api.webapp_config
max_n_files = int(get_checked_config("max_n_upload_files"))
multi_modal: bool = get_llm_capabilities().get("multi_modal", False)

def save_uploads(files: List[FileStorage], auth_identifier: str) -> UploadFileResponse:
    extractions: UploadFileResponse = {"media_summaries": []}
    if len(files) > max_n_files:
        raise Exception(UploadFileError.TOO_MANY_FILES.value)
    for file in files:
        if file.filename == "" or file.filename is None:
            raise Exception(UploadFileError.NO_SELECTED_FILE.value)
        extension = allowed_file(file, multi_modal)
        try:
            secure_name = secure_filename(file.filename)
            file_name = secure_filename(f"{int(time.time())}_{secure_name}")
            logger.debug(f"Uploading file name: {file_name}")
            file_path = f"{auth_identifier}/{file_name}"
            file_data: bytes = get_file_data(file)
            chain_type = ""
            preview = None
            extracted_text = None
            b64_image = ""
            extracted_images_path = []
            if extension in IMAGE_EXTENSIONS:
                image = Image.open(BytesIO(file_data))
                resized_image = resize_with_ratio(image)
                buffered = BytesIO()
                resized_image.save(buffered, format="PNG")
                b64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
                preview = f"data:image/png;base64,{b64_image}"
                chain_type = UploadChainTypes.IMAGE.value

            elif extension == "pdf":
                extracted_text, is_doc_as_image = extract_pdf_text(file_data)
                b64_image = first_page_to_preview(file_data)
                preview = f"data:image/png;base64,{b64_image}" 
                if is_doc_as_image:
                    chain_type = UploadChainTypes.DOCUMENT_AS_IMAGE.value
                else:
                    # TODO: add in memory RAG HERE when needed UploadChainTypes.LONG_DOCUMENT.value
                    chain_type = UploadChainTypes.SHORT_DOCUMENT.value
            elif extension == "pptx":
                b64_images = extract_pptx_slides_as_imgs(file_path, file_data) 
                extracted_text, is_doc_as_image = extract_pptx_text(file_data, extension, len(b64_images))
                extracted_images_path = save_pptx_slides_next_to_original_document(b64_images, file_path)
                extracted_text = f"File : {file_name} composed of {len(b64_images)} slides. Extracted content:\n{extracted_text}"
                preview = f"data:image/png;base64,{b64_images[0]}" 
                if is_doc_as_image:
                    chain_type = UploadChainTypes.DOCUMENT_AS_IMAGE.value
                else:
                    chain_type = UploadChainTypes.SHORT_DOCUMENT.value
            elif extension in DOCUMENT_EXTENSIONS:
                chain_type = UploadChainTypes.SHORT_DOCUMENT.value
                media_summary: Union[MediaSummary, None]
                if extension == "docx":
                    extracted_text = extract_docx_text(file_data)
                else:
                    extracted_text = extract_plain_text(file_data)
            else:
                raise Exception(UploadFileError.INVALID_FILE_TYPE.value)
            dataiku.Folder(webapp_config.get("upload_folder")).upload_stream(file_path, file)

            # if extraction then extract else just save
            media_summary = MediaSummary(
                    chain_type=chain_type,
                    original_file_name=file.filename,
                    file_path=file_path,
                    full_extracted_text=extracted_text,
                    preview=preview,
                    extracted_images_path=extracted_images_path if extension == "pptx" else [],
            )
            metadata_path: str = save_extracted_json(file_path, media_summary or {})
            media_summary["metadata_path"] = metadata_path
            del media_summary["full_extracted_text"]
            extractions["media_summaries"].append(media_summary)
        except Exception:
            raise Exception(UploadFileError.GENERIC_ERROR.value)
    return extractions