/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.embed_documents;

import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.recipes.nlp.embed_documents.ExtractedData;
import com.dataiku.dip.recipes.nlp.embed_documents.ProcessedDocumentResources;
import com.dataiku.dip.util.AutoDelete;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dss.shadelib.org.apache.commons.io.FileUtils;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class ExtractedDataWriter {
    File toAddChunksContentFile;
    File toAddChunksUuidsFile;
    File toRemoveChunksUuidsFile;
    List<ProcessedDocumentResources> processedDocumentResourcesAccumulator = new ArrayList<ProcessedDocumentResources>();
    static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.embed_documents_writer");

    public ExtractedDataWriter(AutoDelete outputDir) throws IOException {
        this.toAddChunksContentFile = new File((File)outputDir, "to_add_chunks_content.jsonl");
        this.toAddChunksUuidsFile = new File((File)outputDir, "to_add_chunks_uuids.csv");
        this.toRemoveChunksUuidsFile = new File((File)outputDir, "to_delete_chunks_uuids.csv");
        FileUtils.write((File)this.toAddChunksContentFile, (CharSequence)"", (String)"utf8", (boolean)true);
        FileUtils.write((File)this.toAddChunksUuidsFile, (CharSequence)"", (String)"utf8", (boolean)true);
        FileUtils.write((File)this.toRemoveChunksUuidsFile, (CharSequence)"", (String)"utf8", (boolean)true);
    }

    synchronized void append(InputRefs.ManagedFolderDocumentRefWithMetadata document, ExtractedData extractedData) throws Exception {
        if (!extractedData.chunks.isEmpty()) {
            logger.info((Object)this.buildMessageLogForDocument(document.documentRef, "Dumping extracted data (" + extractedData.chunks.size() + " chunks) to disk"));
            try (BufferedWriter bwChunks = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.toAddChunksContentFile, true), StandardCharsets.UTF_8));
                 BufferedWriter bwUuids = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.toAddChunksUuidsFile, true), StandardCharsets.UTF_8));){
                extractedData.chunks.forEach(chunk -> {
                    try {
                        bwChunks.write(chunk.jsonify(document.securityToken, document.userMetadata).toString());
                        bwChunks.newLine();
                        bwUuids.write(chunk.uuid);
                        bwUuids.newLine();
                    }
                    catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                });
            }
            document.userMetadata = null;
            document.securityToken = null;
            this.processedDocumentResourcesAccumulator.add(new ProcessedDocumentResources(document, extractedData.associatedStoragePath, extractedData.chunks.stream().map(chunk -> chunk.uuid).toList()));
        }
    }

    void dumpOutdatedChunksUuids(Set<String> chunksIdsToDelete) throws IOException {
        FileUtils.writeLines((File)this.toRemoveChunksUuidsFile, (String)"utf8", chunksIdsToDelete, (String)"\n", (boolean)false);
    }

    public String buildMessageLogForDocument(InputRefs.ManagedFolderDocumentRef documentRef, String message) {
        return String.format("[%s] - %s", documentRef.filePath, message);
    }
}

