/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.docextraction;

import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.recipes.nlp.rag_embedding.RAGEmbeddingRecipeCreator;
import com.dataiku.dss.shadelib.com.google.common.base.Strings;
import javax.annotation.Nullable;

public class VLMExtractor {
    public static final String PROMPT_LIMIT_PLACEHOLDER = "__DKU_NUMBER_OF_CHARS__";
    public static final String VLM_SUMMARY_EXTRACTION_PROMPT_TEMPLATE = String.format("Generate a concise summary, up to %s characters, derived from the screenshot(s) of document page(s) provided.\nBegin with a brief overview and highlight crucial words, facts, or concepts to enhance both semantic and keyword searchability.\nOmit any references to the original source.\n", "__DKU_NUMBER_OF_CHARS__");
    public static final String VLM_FULL_EXTRACTION_PROMPT = "Extract the information from the screenshot(s) of document page(s) provided at the end, maintaining the original text without alteration. Follow these guidelines:\n* Use Markdown to format the text, including headers such as Title, Subtitle, and Main Sections, as well as any tables present in the document.\n* Describe any images and charts within the document screenshot(s), as the visual content cannot be directly extracted.\n* Exclude any hyperlinks, as they cannot be extracted from an image.\n* Ensure the text content remains unchanged.\n* Extract all text, including any footers or reference lists.\n";

    private VLMExtractor() {
        throw new IllegalStateException("Utility class");
    }

    public static String getExtractionPromptFromTokensLimit(String extractionPromptTemplate, @Nullable Integer maxTokensLimit) {
        return VLMExtractor.getExtractionPromptFromCharsLimit(extractionPromptTemplate, RAGEmbeddingRecipeCreator.adaptDefaultChunkSizeCharacters(maxTokensLimit));
    }

    public static String getExtractionPromptFromCharsLimit(String extractionPromptTemplate, int charsLimit) {
        if (Strings.isNullOrEmpty((String)extractionPromptTemplate)) {
            return extractionPromptTemplate;
        }
        return extractionPromptTemplate.replace(PROMPT_LIMIT_PLACEHOLDER, String.valueOf(charsLimit));
    }

    public static String getSummaryExtractionPrompt(@Nullable Integer maxTokensLimit) {
        return VLMExtractor.getExtractionPromptFromTokensLimit(VLM_SUMMARY_EXTRACTION_PROMPT_TEMPLATE, maxTokensLimit);
    }

    public static class VLMExtractorSettings {
        public int windowSize;
        public int windowOverlap;
        public String llmId;
        public String llmPrompt;
        public boolean aggregateResults = false;

        public String toString() {
            return "windowSize=" + this.windowSize + ", windowOverlap=" + this.windowOverlap + ", llmId=" + this.llmId + ", llmPrompt='" + this.llmPrompt + "'";
        }
    }

    public static class VLMInputs {
        public InputRefs.ImagesRef imagesRef;
    }

    public static class VLMExtractorRequest {
        public VLMInputs inputs = new VLMInputs();
        public VLMExtractorSettings settings = new VLMExtractorSettings();

        public VLMExtractorRequest() {
        }

        public VLMExtractorRequest(InputRefs.ImagesRef imagesRef, VLMExtractorSettings settings) {
            this.inputs.imagesRef = imagesRef;
            this.settings = settings;
        }
    }
}

