
from pathlib import Path
from typing import Optional, Union

from common.backend.models.base import MediaSummary, UploadChainTypes, UploadFileError
from common.backend.utils.file_extraction.pptx.pptx_utils import (
    extract_pptx_slides_as_imgs,
    extract_pptx_text,
    save_pptx_slides_next_to_original_document,
)
from common.backend.utils.sql_timing import log_execution_time
from common.backend.utils.upload_utils import save_extracted_json
from common.solutions.chains.summary.doc_as_image_summary_chain import DocAsImageSummaryChain
from common.solutions.chains.summary.text_extraction_summary_chain import TextExtractionSummaryChain


@log_execution_time
def extract_pptx_summary(
    file_path: str, file_data: bytes, original_file_name: str, language: Optional[str]
) -> MediaSummary:
    extension = Path(original_file_name).suffix if original_file_name else ""
    media_summary: Union[MediaSummary, None]
    extracted_text = ""
    is_doc_as_image = False

    b64_images = extract_pptx_slides_as_imgs(file_path, file_data) 
    extracted_images_path = save_pptx_slides_next_to_original_document(b64_images, file_path)
    extracted_text, is_doc_as_image = extract_pptx_text(file_data, extension, len(b64_images))

    if is_doc_as_image:
        media_summary = DocAsImageSummaryChain(file_data, original_file_name, language, file_path, b64_images).get_summary()
        if media_summary is None:
            raise Exception(UploadFileError.PARSING_ERROR.value)
        media_summary["chain_type"] = UploadChainTypes.DOCUMENT_AS_IMAGE.value
    else:
        media_summary = TextExtractionSummaryChain(extracted_text, original_file_name, language).get_summary()
        if media_summary is None:
            raise Exception(UploadFileError.PARSING_ERROR.value)
        if media_summary.get("summary") is None:
            media_summary["chain_type"] = UploadChainTypes.LONG_DOCUMENT.value
        else:
            media_summary["chain_type"] = UploadChainTypes.SHORT_DOCUMENT.value

    media_summary = {
        **media_summary,
        "file_path": file_path,
        "preview": f"data:image/png;base64,{b64_images[0]}",
        "full_extracted_text": extracted_text,
        "extracted_images_path": extracted_images_path,
    }
    metadata_path: str = save_extracted_json(file_path, media_summary or {})
    media_summary["metadata_path"] = metadata_path
    del media_summary["full_extracted_text"]
    return media_summary
