/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.embed_documents;

import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.managedfolder.ManagedFolder;
import com.dataiku.dip.managedfolder.ManagedFolderHandler;
import com.dataiku.dip.recipes.nlp.common.EmbeddingRecipePayloadBaseParams;
import com.dataiku.dip.recipes.nlp.embed_documents.EmbedDocumentsRecordManager;
import com.dataiku.dip.recipes.nlp.embed_documents.EmbedDocumentsRule;
import com.dataiku.dip.recipes.nlp.embed_documents.IndexerComputationResult;
import com.dataiku.dip.recipes.nlp.embed_documents.ProcessedDocumentResources;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.PathUtils;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

public class EmbedDocumentsIndexer {
    EmbeddingRecipePayloadBaseParams.VectorStoreUpdateMethod updateMethod;
    EmbedDocumentsRecordManager recordManager;
    static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.embed_documents_indexer");

    public EmbedDocumentsIndexer(EmbeddingRecipePayloadBaseParams.VectorStoreUpdateMethod updateMethod, EmbedDocumentsRecordManager recordManager) {
        this.updateMethod = updateMethod;
        this.recordManager = recordManager;
    }

    public IndexerComputationResult startIndexing() {
        logger.info((Object)("Indexing documents from the input folder with updateMethod=" + String.valueOf((Object)this.updateMethod)));
        return new IndexerComputationResult(this.updateMethod, this.recordManager.listKnownDocuments());
    }

    public SingleDocumentIndexerResult indexDocument(InputRefs.ManagedFolderDocumentRefWithMetadata inputDocument, EmbedDocumentsRule ruleToApply, IndexerComputationResult indexerResult) {
        Set knownDocsWithSamePath = indexerResult.knownDocs.stream().filter(doc -> doc.documentRef.filePath.equals(inputDocument.documentRef.filePath)).collect(Collectors.toSet());
        SingleDocumentIndexerResult documentIndexerResult = knownDocsWithSamePath.isEmpty() ? SingleDocumentIndexerResult.IS_NEW : (this.updateMethod.isSmart && !ruleToApply.reExtractUnmodifiedDocuments && knownDocsWithSamePath.contains(inputDocument) ? SingleDocumentIndexerResult.IS_KNOWN_TO_KEEP : SingleDocumentIndexerResult.IS_KNOWN_TO_REPROCESS);
        indexerResult.addIndexedDocument(documentIndexerResult, inputDocument);
        return documentIndexerResult;
    }

    public void stopIndexing(IndexerComputationResult indexerResult) {
        this.recordManager.refreshExistingChunksFromDocuments(indexerResult.knownToKeepDocs, indexerResult.indexTime);
        if (indexerResult.shouldDeduplicate) {
            this.recordManager.flagDuplicatedChunksToRemove();
        }
        indexerResult.markAsDone();
        Set deletedNotReprocessed = indexerResult.getDocumentsToDelete().stream().map(doc -> doc.documentRef.filePath).collect(Collectors.toSet());
        deletedNotReprocessed.removeAll(indexerResult.getDocumentsToProcess().stream().map(doc -> doc.documentRef.filePath).collect(Collectors.toSet()));
        logger.info((Object)("Finished indexing documents (new=" + indexerResult.newDocs.size() + ", to reprocess=" + indexerResult.knownToReprocessDocs.size() + ", to skip=" + indexerResult.knownToKeepDocs.size() + ", to remove=" + deletedNotReprocessed.size() + ")"));
    }

    public Set<String> getChunksIdsToDelete(IndexerComputationResult indexerResult) {
        logger.info((Object)"Listing outdated chunks ids to delete from KB");
        Set<String> chunksToDelete = this.recordManager.listExistingChunksFromDocuments(indexerResult.getDocumentsToDelete(), indexerResult.indexTime);
        if (indexerResult.shouldDeduplicate) {
            chunksToDelete.addAll(this.recordManager.listDuplicatedChunksToDelete());
        }
        return chunksToDelete;
    }

    private void cleanOutdatedScreenshots(Set<InputRefs.ManagedFolderDocumentRefWithMetadata> documentsToDelete, long indexingTime, ManagedFolder managedFolderIdFromKB, AuthCtx authCtx) throws Exception {
        Set<String> outdatedScreenshotsDirs = this.recordManager.listOutdatedStorageDirs(documentsToDelete, indexingTime);
        if (outdatedScreenshotsDirs.isEmpty()) {
            logger.info((Object)"No outdated screenshots directories found, skipping output managed folder cleaning");
        } else {
            logger.info((Object)("Found outdated screenshots directories to clean from output managed folder:" + String.valueOf(outdatedScreenshotsDirs)));
            logger.info((Object)"Start cleaning outdated screenshots paths from output managed folder");
            try (ManagedFolderHandler outputFolderHandler = (ManagedFolderHandler)managedFolderIdFromKB.buildHandler(authCtx);){
                outdatedScreenshotsDirs.forEach(dir -> {
                    try {
                        logger.info((Object)("Deleting outdated screenshot directory: " + dir));
                        outputFolderHandler.deleteDirectory((String)dir);
                        String parentPath = PathUtils.getParent((String)dir);
                        while (!StringUtils.isBlank((String)parentPath) && outputFolderHandler.browseDirectory((String)parentPath).children.isEmpty()) {
                            logger.info((Object)("Also deleting its parent dir which is empty " + parentPath));
                            outputFolderHandler.deleteDirectory(parentPath);
                            parentPath = PathUtils.getParent((String)parentPath);
                        }
                    }
                    catch (CodedException | DKUSecurityException | IOException e) {
                        logger.error((Object)("Error deleting outdated screenshot directory: " + dir + " - skipping"), e);
                    }
                });
            }
            logger.info((Object)"Done cleaning outdated screenshots paths from disk");
        }
    }

    public void updateRecordManagerAndCleanOutdatedScreenshots(IndexerComputationResult indexerResult, List<ProcessedDocumentResources> processedChunksIdsAccumulator, ManagedFolder managedFolderIdFromKB, AuthCtx authCtx, String runID) throws Exception {
        Set<InputRefs.ManagedFolderDocumentRefWithMetadata> documentsToDelete = indexerResult.getDocumentsToDelete();
        this.recordManager.ensureInitialized();
        this.recordManager.addNewChunksFromDocuments(processedChunksIdsAccumulator, indexerResult.indexTime, runID);
        this.cleanOutdatedScreenshots(documentsToDelete, indexerResult.indexTime, managedFolderIdFromKB, authCtx);
        this.recordManager.deleteExistingChunksFromDocuments(documentsToDelete, indexerResult.indexTime);
        if (indexerResult.shouldDeduplicate) {
            this.recordManager.removeDuplicatedChunks();
        }
    }

    public static enum SingleDocumentIndexerResult {
        IS_NEW,
        IS_KNOWN_TO_REPROCESS,
        IS_KNOWN_TO_KEEP;

    }
}

