/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.embed_documents;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.dao.GeneralSettingsDAO;
import com.dataiku.dip.docextraction.StructuredExtractor;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.recipes.RecipeParams;
import com.dataiku.dip.recipes.nlp.embed_documents.EmbedDocumentsRule;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;

public class EmbedDocumentsRecipeParams
extends RecipeParams {
    public static final String DSS_NO_VLM_SELECTION = "DSS_NO_SELECTION";
    public ExtractionMode extractionMode = ExtractionMode.CUSTOM_RULES;
    public static final Map<ExtractionMode, String[]> EXTRACTION_EXTENSION_SHORTLIST = Map.ofEntries(Map.entry(ExtractionMode.MANAGED_TEXT_ONLY, new String[]{"html", "txt", "md"}), Map.entry(ExtractionMode.MANAGED_VISUAL_ONLY, new String[]{"pdf", "docx", "pptx", "jpg", "png"}));
    public static final Map<ExtractionMode, String[]> EXTRACTION_EXTENSION_LONGLIST = Map.ofEntries(Map.entry(ExtractionMode.MANAGED_TEXT_ONLY, new String[]{"pdf", "docx", "pptx", "html", "txt", "md"}), Map.entry(ExtractionMode.MANAGED_VISUAL_ONLY, new String[]{"pdf", "odp", "ppt", "odt", "doc", "docx", "xls", "xlsx", "xlsb", "xlsm", "ods", "pptx", "png", "jpeg", "jpg"}));
    private String defaultVlmId;
    private StructuredExtractor.ImageHandlingMode defaultImageHandlingMode = StructuredExtractor.ImageHandlingMode.IGNORE;
    private ArrayList<EmbedDocumentsRule> rules = new ArrayList();
    private EmbedDocumentsRule allOtherRule;

    public static EmbedDocumentsRule getDefaultAllOtherRule(@Nullable EnrichedLLMStructuredRef embeddingModelRef, @Nullable String defaultVlmId, @Nullable StructuredExtractor.ImageHandlingMode defaultImageHandlingMode) {
        EmbedDocumentsRule otherRule = EmbedDocumentsRecipeParams.getDefaultRulesFrom(ExtractionMode.MANAGED_TEXT_ONLY, true, embeddingModelRef, defaultVlmId, defaultImageHandlingMode).get(0);
        otherRule.filter = null;
        otherRule.actionToPerform = EmbedDocumentsRule.ActionToPerform.DONOTEXTRACT;
        return otherRule;
    }

    public static List<EmbedDocumentsRule> getDefaultRulesFrom(ExtractionMode extractionMode, boolean shortList, @Nullable EnrichedLLMStructuredRef embeddingModelRef, @Nullable String defaultVlmId, @Nullable StructuredExtractor.ImageHandlingMode defaultImageHandlingMode) {
        String[] structuredDocExtensions = EXTRACTION_EXTENSION_LONGLIST.get((Object)ExtractionMode.MANAGED_TEXT_ONLY);
        String[] vlmDocExtensions = EXTRACTION_EXTENSION_LONGLIST.get((Object)ExtractionMode.MANAGED_VISUAL_ONLY);
        if (shortList) {
            vlmDocExtensions = EXTRACTION_EXTENSION_SHORTLIST.get((Object)ExtractionMode.MANAGED_VISUAL_ONLY);
        }
        if (extractionMode == ExtractionMode.MANAGED_TEXT_ONLY) {
            ArrayList<EmbedDocumentsRule> textExtractionRules = new ArrayList<EmbedDocumentsRule>();
            EmbedDocumentsRule structuredRule = new EmbedDocumentsRule(EmbedDocumentsRule.ActionToPerform.STRUCTURED, structuredDocExtensions, embeddingModelRef);
            structuredRule.structuredSettings = new EmbedDocumentsRule.UIStructuredExtractorSettings();
            structuredRule.structuredSettings.imageHandlingMode = defaultImageHandlingMode;
            if (defaultImageHandlingMode == StructuredExtractor.ImageHandlingMode.OCR) {
                structuredRule.structuredSettings.ocrEngine = StructuredExtractor.OCRSettings.OCREngine.AUTO;
            }
            structuredRule.storeInMultimodalColumn = EmbedDocumentsRule.MultimodalContentType.FULL_CONTENT;
            textExtractionRules.add(structuredRule);
            return textExtractionRules;
        }
        if (extractionMode == ExtractionMode.MANAGED_VISUAL_ONLY) {
            defaultVlmId = defaultVlmId == null || defaultVlmId.equals(DSS_NO_VLM_SELECTION) ? null : defaultVlmId;
            ArrayList<EmbedDocumentsRule> visualExtractionRules = new ArrayList<EmbedDocumentsRule>();
            EmbedDocumentsRule vlmRule = new EmbedDocumentsRule(EmbedDocumentsRule.ActionToPerform.VLM, vlmDocExtensions, embeddingModelRef);
            vlmRule.vlmSettings = new EmbedDocumentsRule.UiVLMExtractorSettings(defaultVlmId, embeddingModelRef);
            vlmRule.storeInMultimodalColumn = EmbedDocumentsRule.MultimodalContentType.IMAGES;
            visualExtractionRules.add(vlmRule);
            structuredDocExtensions = EXTRACTION_EXTENSION_SHORTLIST.get((Object)ExtractionMode.MANAGED_TEXT_ONLY);
            EmbedDocumentsRule structuredRule = new EmbedDocumentsRule(EmbedDocumentsRule.ActionToPerform.STRUCTURED, structuredDocExtensions, embeddingModelRef);
            structuredRule.structuredSettings = new EmbedDocumentsRule.UIStructuredExtractorSettings();
            structuredRule.storeInMultimodalColumn = EmbedDocumentsRule.MultimodalContentType.FULL_CONTENT;
            visualExtractionRules.add(structuredRule);
            return visualExtractionRules;
        }
        if (extractionMode == ExtractionMode.CUSTOM_RULES) {
            return new ArrayList<EmbedDocumentsRule>();
        }
        throw new IllegalArgumentException("Unknown extraction mode " + String.valueOf((Object)extractionMode));
    }

    public void initializeWithDefaultRules(EnrichedLLMStructuredRef embeddingModelRef, String creationVLMId) {
        if (creationVLMId != null) {
            this.defaultVlmId = creationVLMId;
        } else {
            GeneralSettingsDAO.EmbedDocumentsRecipeSettings embedDocumentsRecipeSettings = ApplicationConfigurator.getGeneralSettingsUnsafeAutoTXN().generativeAISettings.embedDocumentsRecipeSettings;
            this.defaultVlmId = null;
            if (embedDocumentsRecipeSettings != null && embedDocumentsRecipeSettings.defaultVLMId != null && !embedDocumentsRecipeSettings.defaultVLMId.equals(DSS_NO_VLM_SELECTION)) {
                this.defaultVlmId = embedDocumentsRecipeSettings.defaultVLMId;
            }
        }
        this.extractionMode = ExtractionMode.MANAGED_TEXT_ONLY;
        if (this.defaultVlmId != null) {
            this.extractionMode = ExtractionMode.MANAGED_VISUAL_ONLY;
        }
        this.rules = new ArrayList();
        this.allOtherRule = EmbedDocumentsRecipeParams.getDefaultAllOtherRule(embeddingModelRef, this.defaultVlmId, this.defaultImageHandlingMode);
    }

    public String getDefaultVLMSelection() {
        return this.defaultVlmId;
    }

    public void setDefaultVLMSelection(String defaultVlmId) {
        this.defaultVlmId = defaultVlmId;
    }

    public List<EmbedDocumentsRule> getRules(EnrichedLLMStructuredRef embeddingModelRef) {
        return this.extractionMode.equals((Object)ExtractionMode.CUSTOM_RULES) ? this.rules : EmbedDocumentsRecipeParams.getDefaultRulesFrom(this.extractionMode, false, embeddingModelRef, this.defaultVlmId, this.defaultImageHandlingMode);
    }

    public List<EmbedDocumentsRule> getAllRules() {
        ArrayList<EmbedDocumentsRule> allRules = new ArrayList<EmbedDocumentsRule>(this.rules);
        allRules.add(this.allOtherRule);
        return allRules;
    }

    public EmbedDocumentsRule getAllOtherFilesRule() {
        return this.allOtherRule;
    }

    public static enum ExtractionMode {
        MANAGED_TEXT_ONLY,
        MANAGED_VISUAL_ONLY,
        CUSTOM_RULES;

    }
}

