/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.common.doc_extraction;

import com.dataiku.dip.dataflow.exec.filter.FilterDesc;
import com.dataiku.dip.llm.EnrichedLLMStructuredRef;
import com.dataiku.dip.recipes.nlp.common.doc_extraction.UiExtractorSettings;
import com.dataiku.dip.recipes.nlp.rag_embedding.RAGEmbeddingRecipeCreator;
import com.dataiku.dip.utils.DKULogger;
import javax.annotation.Nullable;

public class DocExtractionRule {
    public FilterDesc filter;
    public ActionToPerform actionToPerform = ActionToPerform.VLM;
    public UiExtractorSettings.VLM vlmSettings;
    public UiExtractorSettings.Structured structuredSettings;
    public UiExtractorSettings.Raw rawSettings;
    @Nullable
    public SplittingSettings splittingSettings;
    public MultimodalContentType storeInMultimodalColumn;
    public boolean reExtractUnmodifiedDocuments = false;
    public static final DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.embed_documents.embedDocumentsRule");

    public DocExtractionRule() {
    }

    public DocExtractionRule(ActionToPerform extractor, String[] supportedFileExtensions, @Nullable EnrichedLLMStructuredRef embeddingModelRef) {
        this.actionToPerform = extractor;
        this.filter = new FilterDesc();
        this.filter.enabled = true;
        this.filter.uiData = new FilterDesc.FilterUiData();
        this.filter.uiData.mode = "||";
        for (String file_ext : supportedFileExtensions) {
            FilterDesc.FilterUiCondition extension_condition = new FilterDesc.FilterUiCondition();
            extension_condition.operator = FilterDesc.FilterUiOperator.EQUALS_CASE_INSENSITIVE_STRING.getRepr();
            extension_condition.input = "file extension";
            extension_condition.string = file_ext;
            extension_condition.col = "file name";
            this.filter.uiData.conditions.add(extension_condition);
        }
        this.splittingSettings = new SplittingSettings(embeddingModelRef);
    }

    public static enum ActionToPerform {
        DONOTEXTRACT,
        VLM,
        STRUCTURED,
        RAW;

    }

    public static class SplittingSettings {
        public int chunkSizeCharacters;
        public int chunkOverlapCharacters;

        public SplittingSettings(@Nullable EnrichedLLMStructuredRef ref) {
            this.chunkSizeCharacters = RAGEmbeddingRecipeCreator.adaptDefaultChunkSizeCharacters(ref != null ? ref.maxTokensLimit : null);
            this.chunkOverlapCharacters = RAGEmbeddingRecipeCreator.adaptDefaultOverlapCharacters(this.chunkSizeCharacters);
        }
    }

    public static enum MultimodalContentType {
        PROMPT_OUTPUT,
        CHUNKED_PROMPT_OUTPUT,
        FULL_CONTENT,
        IMAGES;

    }
}

