/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.nlp.common.doc_extraction;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.connections.AbstractSQLConnection;
import com.dataiku.dip.connections.SQLConnectionProvider;
import com.dataiku.dip.connections.SQLiteConnection;
import com.dataiku.dip.docextraction.common.InputRefs;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.fs.FSPath;
import com.dataiku.dip.recipes.nlp.common.doc_extraction.ProcessedDocumentResources;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.DSSAuthCtx;
import com.dataiku.dip.sql.SQLUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.PathUtils;
import java.io.File;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class DocExtractionRecordManager {
    public static final String RECORD_MANAGER_FILENAME = "dss_record_manager_cache.sqlite";
    public String recordManagerPath;
    public SQLConnectionProvider.SQLConnectionData connectionData;
    public String projectKey;
    private static final Map<String, String> TABLE_SCHEMA = Map.of("documentPath", "TEXT", "documentModifiedAt", "BIGINT", "documentSizeMB", "BIGINT", "chunkUUID", "TEXT", "storagePath", "TEXT", "recordLastUpdated", "BIGINT", "runId", "TEXT", "securityTokensHash", "TEXT", "userMetadataHash", "TEXT", "outputPartitionId", "TEXT");
    private final int QUERY_TIMEOUT = this.setQueryTimeout();
    private final int BATCH_SIZE = this.setBatchSize();
    protected static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.nlp.doc_extraction_record_manager");

    public DocExtractionRecordManager(File recordManagerFolder, String projectKey) {
        logger.info((Object)("Initializing EmbedDocumentsRecordManager at " + recordManagerFolder.getAbsolutePath()));
        this.recordManagerPath = PathUtils.concatLNT((String[])new String[]{recordManagerFolder.getAbsolutePath(), RECORD_MANAGER_FILENAME});
        this.projectKey = projectKey;
        SQLiteConnection sqLiteConnection = new SQLiteConnection();
        sqLiteConnection.params.filepath = this.recordManagerPath;
        sqLiteConnection.params.properties.add(new AbstractSQLConnection.CustomDatabaseProperty("USER", "sqlite_admin", true));
        sqLiteConnection.name = "internal-sqlite-connection-for-record-manager";
        try {
            this.connectionData = sqLiteConnection.getConnectionData_NT(DSSAuthCtx.newNone(), projectKey);
        }
        catch (DKUSecurityException | SQLException e) {
            throw new RuntimeException(e);
        }
        this.ensureInitialized();
    }

    public DocExtractionRecordManager() {
    }

    public void createRecordManagerTable() {
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (Statement statement = connection.createStatement();){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            String columnsDefinition = TABLE_SCHEMA.entrySet().stream().map(entry -> (String)entry.getKey() + " " + (String)entry.getValue()).collect(Collectors.joining(", "));
            statement.executeUpdate("CREATE TABLE IF NOT EXISTS upsertion_record (" + columnsDefinition + ")");
            statement.executeUpdate("CREATE INDEX IF NOT EXISTS upsertion_record_docs ON upsertion_record(documentPath, documentModifiedAt, documentSizeMB, recordLastUpdated)");
            connection.commit();
            connection.close();
            logger.info((Object)"Successfully created record manager table (upsertion_record)");
        }
        catch (SQLException e) {
            logger.error((Object)"Error creating record manager table", (Throwable)e);
            throw new RuntimeException(e);
        }
    }

    public void updateTableSchemaIfNeeded() {
        logger.info((Object)"Checking compatibility of record manager table schema.");
        HashSet<String> retrievedColumnNames = new HashSet<String>();
        String query = "SELECT name FROM pragma_table_info('upsertion_record')";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            ResultSet rs2 = statement.executeQuery();
            while (rs2.next()) {
                retrievedColumnNames.add(rs2.getString("name"));
            }
        }
        catch (SQLException e) {
            throw new RuntimeException(e);
        }
        for (Map.Entry<String, String> columnSchema : TABLE_SCHEMA.entrySet()) {
            if (retrievedColumnNames.contains(columnSchema.getKey())) continue;
            logger.info((Object)("Updating record manager table schema to include column: " + columnSchema.getKey()));
            String addMissingColumnQuery = "ALTER TABLE upsertion_record ADD COLUMN " + columnSchema.getKey() + " " + columnSchema.getValue();
            try {
                Statement statement = connection.createStatement();
                try {
                    statement.setQueryTimeout(this.QUERY_TIMEOUT);
                    statement.executeUpdate(addMissingColumnQuery);
                    connection.commit();
                    logger.info((Object)("Column " + columnSchema.getKey() + " was successfully added to the record manager"));
                }
                finally {
                    if (statement == null) continue;
                    statement.close();
                }
            }
            catch (SQLException e) {
                logger.error((Object)("Failed to add column " + columnSchema.getKey() + " to the record manager "), (Throwable)e);
            }
        }
        try {
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"Error closing upsertion_record table connection", (Throwable)e);
            throw new RuntimeException(e);
        }
    }

    public void ensureInitialized() {
        File recordManagerFile = new File(this.recordManagerPath);
        if (recordManagerFile.exists()) {
            logger.info((Object)("Using existing record manager: " + this.recordManagerPath));
            this.updateTableSchemaIfNeeded();
        } else {
            logger.info((Object)("Creating a new record manager: " + this.recordManagerPath));
            this.createRecordManagerTable();
        }
    }

    public SQLConnectionProvider.SQLConnectionWrapper getConnection() {
        try {
            return SQLConnectionProvider.newConnection(this.connectionData, (AuthCtx)DSSAuthCtx.newNone(), this.projectKey);
        }
        catch (DKUSecurityException | InterruptedException | SQLException e) {
            throw new RuntimeException("Unable to open a connection to record manager db:", e);
        }
    }

    public void addNewChunksFromDocuments(List<ProcessedDocumentResources> processedDocuments, long upsertionTime, String currentRunId) {
        logger.info((Object)("Adding new record manager entries for " + processedDocuments.size() + " documents "));
        String sql = "INSERT INTO upsertion_record (documentPath, documentModifiedAt, documentSizeMB, chunkUUID, storagePath, securityTokensHash, userMetadataHash, recordLastUpdated, runId, outputPartitionId) VALUES(?,?,?,?,?,?,?,?,?,?)";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(sql);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            int opCounter = 0;
            for (ProcessedDocumentResources processedDoc : processedDocuments) {
                for (String chunkUuid : processedDoc.associatedChunksIds) {
                    statement.setString(1, processedDoc.doc.documentRef.filePath);
                    statement.setLong(2, processedDoc.doc.documentRef.fileLastModified);
                    statement.setLong(3, processedDoc.doc.documentRef.fileSize);
                    statement.setString(4, chunkUuid);
                    statement.setString(5, processedDoc.assetsStoragePath != null ? processedDoc.assetsStoragePath : "");
                    statement.setString(6, processedDoc.doc.securityTokenHash);
                    statement.setString(7, processedDoc.doc.userMetadataHash);
                    statement.setLong(8, upsertionTime);
                    statement.setString(9, currentRunId);
                    statement.setString(10, null);
                    statement.addBatch();
                    if (++opCounter % this.BATCH_SIZE != 0) continue;
                    statement.executeBatch();
                }
            }
            if (opCounter % this.BATCH_SIZE != 0) {
                statement.executeBatch();
            }
            connection.commit();
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"An error happened while adding new chunks in record manager", (Throwable)e);
            SQLUtils.unsafeRollbackAndClose(connection);
            throw new RuntimeException(e);
        }
    }

    public void refreshExistingChunksFromDocuments(Set<InputRefs.ManagedFolderDocumentRefWithMetadata> knownToKeepDocuments, long newIndexTime) {
        if (knownToKeepDocuments.isEmpty()) {
            logger.info((Object)"No document to reuse - skipping refresh in record manager.");
            return;
        }
        logger.info((Object)("Refreshing record manager entries from documents " + String.valueOf(knownToKeepDocuments.stream().map(doc -> doc.documentRef.filePath).toList())));
        String query = "UPDATE upsertion_record SET recordLastUpdated = ? WHERE documentPath == ? AND documentModifiedAt == ? AND documentSizeMB == ? AND ((securityTokensHash IS NULL and ? IS NULL) or (securityTokensHash = ?)) AND ((userMetadataHash IS NULL and ? IS NULL) or (userMetadataHash = ?))";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            int opCounter = 0;
            for (InputRefs.ManagedFolderDocumentRefWithMetadata document : knownToKeepDocuments) {
                statement.setLong(1, newIndexTime);
                statement.setString(2, document.documentRef.filePath);
                statement.setLong(3, document.documentRef.fileLastModified);
                statement.setLong(4, document.documentRef.fileSize);
                statement.setString(5, document.securityTokenHash);
                statement.setString(6, document.securityTokenHash);
                statement.setString(7, document.userMetadataHash);
                statement.setString(8, document.userMetadataHash);
                statement.addBatch();
                if (++opCounter % this.BATCH_SIZE != 0) continue;
                statement.executeBatch();
            }
            if (opCounter % this.BATCH_SIZE != 0) {
                statement.executeBatch();
            }
            connection.commit();
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"failed to refresh update time for existing (reused) chunks");
            SQLUtils.unsafeRollbackAndClose(connection);
            throw new RuntimeException(e);
        }
    }

    public Set<String> listExistingChunksFromDocuments(Set<InputRefs.ManagedFolderDocumentRefWithMetadata> documents, long upperBoundTime) {
        HashSet<String> chunks = new HashSet<String>();
        if (documents.isEmpty()) {
            logger.info((Object)"Skipping listing of record manager entries (chunks). The provided document list is empty.");
            return chunks;
        }
        logger.info((Object)("Listing record manager entries (chunks) from documents " + String.valueOf(documents.stream().map(doc -> doc.documentRef.filePath).toList())));
        String query = "SELECT chunkUUID FROM upsertion_record WHERE documentPath == ? AND documentModifiedAt == ? AND documentSizeMB == ? AND ((securityTokensHash IS NULL and ? IS NULL) or (securityTokensHash = ?)) AND ((userMetadataHash IS NULL and ? IS NULL) or (userMetadataHash = ?)) AND recordLastUpdated < ?";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            for (InputRefs.ManagedFolderDocumentRefWithMetadata doc2 : documents) {
                statement.setString(1, doc2.documentRef.filePath);
                statement.setLong(2, doc2.documentRef.fileLastModified);
                statement.setLong(3, doc2.documentRef.fileSize);
                statement.setString(4, doc2.securityTokenHash);
                statement.setString(5, doc2.securityTokenHash);
                statement.setString(6, doc2.userMetadataHash);
                statement.setString(7, doc2.userMetadataHash);
                statement.setLong(8, upperBoundTime);
                ResultSet rs2 = statement.executeQuery();
                while (rs2.next()) {
                    chunks.add(rs2.getString("chunkUUID"));
                }
            }
            connection.close();
        }
        catch (SQLException e) {
            throw new RuntimeException(e);
        }
        return chunks;
    }

    public Set<String> listOutdatedStorageDirs(Set<InputRefs.ManagedFolderDocumentRefWithMetadata> documentsToDelete, long indexingTime) {
        HashSet<String> outdatedStoragePaths = new HashSet<String>();
        if (documentsToDelete.isEmpty()) {
            logger.info((Object)"No document to delete - skipping listing of outdated assets paths from record manager");
            return outdatedStoragePaths;
        }
        logger.info((Object)("Checking if any outdated assets paths from record manager from documents :" + String.valueOf(documentsToDelete.stream().map(doc -> doc.documentRef.filePath).toList())));
        String query = "SELECT DISTINCT storagePath FROM upsertion_record WHERE documentPath == ? AND documentModifiedAt == ? AND documentSizeMB == ? AND ((securityTokensHash IS NULL and ? IS NULL) or (securityTokensHash = ?)) AND ((userMetadataHash IS NULL and ? IS NULL) or (userMetadataHash = ?)) AND recordLastUpdated < ? AND storagePath <> '' AND storagePath not in (SELECT DISTINCT storagePath FROM upsertion_record WHERE recordLastUpdated >= ?)";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            for (InputRefs.ManagedFolderDocumentRefWithMetadata doc2 : documentsToDelete) {
                statement.setString(1, doc2.documentRef.filePath);
                statement.setLong(2, doc2.documentRef.fileLastModified);
                statement.setLong(3, doc2.documentRef.fileSize);
                statement.setString(4, doc2.securityTokenHash);
                statement.setString(5, doc2.securityTokenHash);
                statement.setString(6, doc2.userMetadataHash);
                statement.setString(7, doc2.userMetadataHash);
                statement.setLong(8, indexingTime);
                statement.setLong(9, indexingTime);
                ResultSet rs2 = statement.executeQuery();
                while (rs2.next()) {
                    outdatedStoragePaths.add(rs2.getString("storagePath"));
                }
            }
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"An error happened while listing outdated path from record manager");
            throw new RuntimeException(e);
        }
        return outdatedStoragePaths;
    }

    public void deleteExistingChunksFromDocuments(Set<InputRefs.ManagedFolderDocumentRefWithMetadata> documentsToDelete, long upperBoundTime) {
        if (documentsToDelete.isEmpty()) {
            logger.info((Object)"No document to delete - skipping deletion of chunks in record manager");
            return;
        }
        logger.info((Object)("Deleting record manager entries (chunks) from documents " + String.valueOf(documentsToDelete.stream().map(doc -> doc.documentRef.filePath).toList())));
        String query = "DELETE FROM upsertion_record WHERE documentPath == ? AND documentModifiedAt == ? AND documentSizeMB == ? AND ((securityTokensHash IS NULL and ? IS NULL) or (securityTokensHash = ?)) AND ((userMetadataHash IS NULL and ? IS NULL) or (userMetadataHash = ?)) AND recordLastUpdated < ? ";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            int opCounter = 0;
            for (InputRefs.ManagedFolderDocumentRefWithMetadata doc2 : documentsToDelete) {
                statement.setString(1, doc2.documentRef.filePath);
                statement.setLong(2, doc2.documentRef.fileLastModified);
                statement.setLong(3, doc2.documentRef.fileSize);
                statement.setString(4, doc2.securityTokenHash);
                statement.setString(5, doc2.securityTokenHash);
                statement.setString(6, doc2.userMetadataHash);
                statement.setString(7, doc2.userMetadataHash);
                statement.setLong(8, upperBoundTime);
                statement.addBatch();
                if (++opCounter % this.BATCH_SIZE != 0) continue;
                statement.executeBatch();
            }
            if (opCounter % this.BATCH_SIZE != 0) {
                statement.executeBatch();
            }
            connection.commit();
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"An error happened while deleting outdated chunks from record manager");
            SQLUtils.unsafeRollbackAndClose(connection);
            throw new RuntimeException(e);
        }
    }

    public Set<InputRefs.ManagedFolderDocumentRefWithMetadata> listKnownDocuments() {
        logger.info((Object)"Listing record manager documents");
        HashSet<InputRefs.ManagedFolderDocumentRefWithMetadata> recordManagerDocuments = new HashSet<InputRefs.ManagedFolderDocumentRefWithMetadata>();
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (Statement statement = connection.createStatement();){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            String query = "select DISTINCT documentPath, documentModifiedAt, documentSizeMB, securityTokensHash, userMetadataHash from upsertion_record";
            ResultSet rs2 = statement.executeQuery(query);
            while (rs2.next()) {
                InputRefs.ManagedFolderDocumentRef reconstructedDocRef = new InputRefs.ManagedFolderDocumentRef(null, new FSPath(rs2.getString("documentPath"), rs2.getLong("documentSizeMB"), rs2.getLong("documentModifiedAt")));
                recordManagerDocuments.add(new InputRefs.ManagedFolderDocumentRefWithMetadata(reconstructedDocRef, rs2.getString("securityTokensHash"), rs2.getString("userMetadataHash")));
            }
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"An error happened while listing known documents from record manager");
            throw new RuntimeException(e);
        }
        logger.info((Object)("Found " + recordManagerDocuments.size() + " record manager known documents"));
        return recordManagerDocuments;
    }

    public Integer getNumberOfChunks() {
        Integer numberOfChunks = null;
        logger.info((Object)"Fetching number of chunks (rows)");
        String query = "SELECT COUNT(*) FROM upsertion_record";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (Statement statement = connection.createStatement();){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            ResultSet rs2 = statement.executeQuery(query);
            rs2.next();
            numberOfChunks = rs2.getInt(1);
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"Unable to compute KB status - nbChunks:", (Throwable)e);
            throw new RuntimeException(e);
        }
        return numberOfChunks;
    }

    public Integer getNumberOfDocuments() {
        Integer numberOfDocuments = null;
        logger.info((Object)"Fetching number of source documents");
        String query = "SELECT COUNT(*) FROM (SELECT DISTINCT documentPath, runId FROM upsertion_record)";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (Statement statement = connection.createStatement();){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            ResultSet rs2 = statement.executeQuery(query);
            rs2.next();
            numberOfDocuments = rs2.getInt(1);
        }
        catch (SQLException e) {
            logger.error((Object)"Unable to compute KB status - nbDocuments:", (Throwable)e);
            throw new RuntimeException(e);
        }
        return numberOfDocuments;
    }

    protected int setQueryTimeout() {
        return ApplicationConfigurator.getParams().getIntParam("dku.llm.embedDocuments.recordManagerQueryTimeout", Integer.valueOf(30));
    }

    protected int setBatchSize() {
        return ApplicationConfigurator.getParams().getIntParam("dku.llm.embedDocuments.recordManagerBatchSize", Integer.valueOf(100));
    }

    public void flagDuplicatedChunksToRemove() {
        logger.info((Object)"Flagging any duplicated versions in record manager to be removed");
        String uniqueVersionDocs = "(SELECT documentPath, documentModifiedAt, documentSizeMB, securityTokensHash, userMetadataHash, max(runId) as runId FROM upsertion_record group by documentPath, documentModifiedAt, documentSizeMB, securityTokensHash, userMetadataHash)";
        String selectKeptChunks = "(SELECT chunkUUID FROM upsertion_record inner join " + uniqueVersionDocs + " as keptVersions on keptVersions.runId=upsertion_record.runId AND keptVersions.documentPath=upsertion_record.documentPath AND keptVersions.documentModifiedAt=upsertion_record.documentModifiedAt AND keptVersions.documentSizeMB=upsertion_record.documentSizeMB AND ((keptVersions.securityTokensHash IS NULL AND upsertion_record.securityTokensHash IS NULL) OR (keptVersions.securityTokensHash = upsertion_record.securityTokensHash)) AND ((keptVersions.userMetadataHash IS NULL AND upsertion_record.userMetadataHash IS NULL) OR (keptVersions.userMetadataHash = upsertion_record.userMetadataHash)))";
        String finalQuery = "UPDATE upsertion_record SET recordLastUpdated = -1 where chunkUUID not in " + selectKeptChunks;
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(finalQuery);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            statement.executeUpdate();
            connection.commit();
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"failed to flag possible duplicated chunks");
            SQLUtils.unsafeRollbackAndClose(connection);
            throw new RuntimeException(e);
        }
    }

    public Set<String> listDuplicatedChunksToDelete() {
        HashSet<String> chunks = new HashSet<String>();
        logger.info((Object)"Listing duplicated record manager entries (chunks) to delete");
        String query = "SELECT chunkUUID FROM upsertion_record WHERE recordLastUpdated = -1";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            ResultSet rs2 = statement.executeQuery();
            while (rs2.next()) {
                chunks.add(rs2.getString("chunkUUID"));
            }
            connection.close();
        }
        catch (SQLException e) {
            throw new RuntimeException(e);
        }
        return chunks;
    }

    public void removeDuplicatedChunks() {
        logger.info((Object)"Removing duplicated record manager entries that could have been added from past runs");
        String query = "DELETE FROM upsertion_record WHERE recordLastUpdated = -1";
        SQLConnectionProvider.SQLConnectionWrapper connection = this.getConnection();
        try (PreparedStatement statement = connection.prepareStatement(query);){
            statement.setQueryTimeout(this.QUERY_TIMEOUT);
            statement.executeUpdate();
            connection.commit();
            connection.close();
        }
        catch (SQLException e) {
            logger.error((Object)"An error happened while deleting duplicated chunks from record manager");
            SQLUtils.unsafeRollbackAndClose(connection);
            throw new RuntimeException(e);
        }
    }
}

