/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.datasets;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.dao.GeneralSettingsDAO;
import com.dataiku.dip.datalayer.memimpl.MemColumn;
import com.dataiku.dip.datalayer.memimpl.MemRow;
import com.dataiku.dip.datalayer.memimpl.MemTable;
import com.dataiku.dip.datalayer.utils.SchemaComparator;
import com.dataiku.dip.datasets.DatasetSelectionToMemTable;
import com.dataiku.dip.futures.FuturePayload;
import com.dataiku.dip.futures.FutureResponse;
import com.dataiku.dip.futures.FutureService;
import com.dataiku.dip.futures.FutureThread;
import com.dataiku.dip.partitioning.PartitionFactory;
import com.dataiku.dip.scheduler.scenarios.ScenarioRunContext;
import com.dataiku.dip.scheduler.steps.FlowComputableSpecification;
import com.dataiku.dip.scheduler.steps.StepRun;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.DSSAuthCtx;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.server.services.ProjectsDAO;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.shaker.SampleBuilder;
import com.dataiku.dip.shaker.model.SerializedShakerScript;
import com.dataiku.dip.shaker.model.ShakerSamplingUtils;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.javax.annotation.Nonnull;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonSerializationContext;
import com.google.gson.reflect.TypeToken;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class DatasetComparisonService {
    private final DatasetAccessService datasetAccessService;
    private final TransactionService transactionService;
    private final FutureService futureService;
    private final ProjectsDAO projectsDAO;
    private final GeneralSettingsDAO generalSettingsDAO;
    private final ScenarioRunContext scenarioRunContext;
    private static final int DUMPED_ROW_SIZE_LIMIT = 100;
    private static final DKULogger logger = DKULogger.getLogger((String)"dip.datasets.datasetComparisonService");

    public DatasetComparisonService(@Autowired DatasetAccessService datasetAccessService, @Autowired TransactionService transactionService, @Autowired FutureService futureService, @Autowired ProjectsDAO projectsDAO, @Autowired GeneralSettingsDAO generalSettingsDAO, @Autowired ScenarioRunContext scenarioRunContext) {
        this.datasetAccessService = datasetAccessService;
        this.transactionService = transactionService;
        this.futureService = futureService;
        this.projectsDAO = projectsDAO;
        this.generalSettingsDAO = generalSettingsDAO;
        this.scenarioRunContext = scenarioRunContext;
    }

    boolean schemasAreEqual(@Nonnull Dataset dataset1, @Nonnull Dataset dataset2, @Nonnull String contextProjectKey, @Nullable List<String> equalityColumns) {
        String schemaComparisonStartMessage = String.format("Comparing the schemas of datasets %s and %s", dataset1.getSmartName(contextProjectKey), dataset2.getSmartName(contextProjectKey));
        logger.info((Object)this.adjustMessageWithEqualityColumns(schemaComparisonStartMessage, equalityColumns));
        Schema schema1 = dataset1.getSchema();
        Schema schema2 = dataset2.getSchema();
        String schemaFilteringErrorTemplate = "Error while restricting the schema of dataset %s to the equality columns %s: %s";
        if (equalityColumns != null) {
            try {
                schema1 = schema1.filter(equalityColumns);
            }
            catch (IllegalArgumentException iae) {
                logger.warnV(schemaFilteringErrorTemplate, new Object[]{dataset1.getSmartName(contextProjectKey), equalityColumns, iae.getMessage()});
                return false;
            }
            try {
                schema2 = schema2.filter(equalityColumns);
            }
            catch (IllegalArgumentException iae) {
                logger.warnV(schemaFilteringErrorTemplate, new Object[]{dataset2.getSmartName(contextProjectKey), equalityColumns, iae.getMessage()});
                return false;
            }
        }
        List<String> incompatibilities = SchemaComparator.findIncompatibilities(schema1, schema2, true);
        String baseSchemaComparisonOutcomeMessage = String.format("The schemas of datasets %s and %s", dataset1.getSmartName(contextProjectKey), dataset2.getSmartName(contextProjectKey));
        baseSchemaComparisonOutcomeMessage = this.adjustMessageWithEqualityColumns(baseSchemaComparisonOutcomeMessage, equalityColumns);
        if (!incompatibilities.isEmpty()) {
            logger.infoV("%s are incompatible, for the following reasons: %s", new Object[]{baseSchemaComparisonOutcomeMessage, incompatibilities});
            logger.debugV("The first dataset has schema: %s", new Object[]{dataset1.getSchema().prettyPrint(2)});
            logger.debugV("The second dataset has schema: %s", new Object[]{dataset2.getSchema().prettyPrint(2)});
            return false;
        }
        logger.infoV("%s are compatible", new Object[]{baseSchemaComparisonOutcomeMessage});
        return true;
    }

    public boolean datasetsAreEqual(@Nonnull DSSAuthCtx authCtx, @Nonnull String contextProjectKey, @Nonnull FlowComputableSpecification datasetSpec1, @Nonnull FlowComputableSpecification datasetSpec2, @Nullable List<String> equalityColumns, boolean contentComparison) throws Exception {
        long maxSampleBytes;
        Dataset dataset2;
        Dataset dataset1;
        try (Transaction t = this.transactionService.beginRead();){
            dataset1 = this.datasetAccessService.getMandatory(datasetSpec1.getLoc(contextProjectKey));
            dataset2 = this.datasetAccessService.getMandatory(datasetSpec2.getLoc(contextProjectKey));
            maxSampleBytes = this.getMaxSampleBytes(contextProjectKey);
        }
        String datasetComparisonStartMessage = String.format("Comparing datasets %s and %s", dataset1.getSmartName(contextProjectKey), dataset2.getSmartName(contextProjectKey));
        logger.info((Object)this.adjustMessageWithEqualityColumns(datasetComparisonStartMessage, equalityColumns));
        if (datasetSpec1.partitionsSpec != null) {
            logger.info((Object)String.format("Comparison will be done on the partition: %s", datasetSpec1.partitionsSpec));
        }
        if (!this.schemasAreEqual(dataset1, dataset2, contextProjectKey, equalityColumns)) {
            return false;
        }
        if (!contentComparison) {
            return true;
        }
        CompareDatasetContentsFutureThread futureThread = new CompareDatasetContentsFutureThread(authCtx, dataset1, dataset2, datasetSpec1.partitionsSpec, datasetSpec2.partitionsSpec, contextProjectKey, equalityColumns, maxSampleBytes, this.scenarioRunContext.getStepRun());
        FutureResponse future = this.futureService.runFuture(futureThread, 0L, new TypeToken<FutureResponse<Boolean>>(){});
        future = this.futureService.waitForFinalResponse(future);
        return (Boolean)future.result;
    }

    @VisibleForTesting
    List<String> findTableDifferences(@Nonnull String datasetName1, @Nonnull MemTable table1, @Nonnull String datasetName2, @Nonnull MemTable table2, @Nullable List<String> equalityColumns) {
        MemRow mismatchedRow;
        Collection rowIndices2;
        String contentsComparisonStartMessage = String.format("Comparing the contents of datasets %s and %s", datasetName1, datasetName2);
        logger.info((Object)this.adjustMessageWithEqualityColumns(contentsComparisonStartMessage, equalityColumns));
        ArrayList<String> differences = new ArrayList<String>();
        if (table1.rows.size() != table2.rows.size()) {
            differences.add(String.format("The two datasets have different sizes: dataset %s has %d rows, dataset %s has %d rows", datasetName1, table1.rows.size(), datasetName2, table2.rows.size()));
            return differences;
        }
        List<String> localEqualityColumns = equalityColumns;
        if (equalityColumns == null) {
            localEqualityColumns = table1.columnsList.stream().map(MemColumn::getName).collect(Collectors.toList());
        }
        Pair<Multimap<String, Integer>, Map<String, MemRow>> table1Maps = this.getTableRowHashMaps(table1, localEqualityColumns);
        Multimap hashToRowIndices1 = (Multimap)table1Maps.first;
        Map hashToRow1 = (Map)table1Maps.second;
        Pair<Multimap<String, Integer>, Map<String, MemRow>> table2Maps = this.getTableRowHashMaps(table2, localEqualityColumns);
        Multimap hashToRowIndices2 = (Multimap)table2Maps.first;
        Map hashToRow2 = (Map)table2Maps.second;
        Iterator keys1Iterator = hashToRowIndices1.keySet().iterator();
        while (keys1Iterator.hasNext()) {
            String hash = (String)keys1Iterator.next();
            Collection rowIndices1 = hashToRowIndices1.get((Object)hash);
            rowIndices2 = hashToRowIndices2.get((Object)hash);
            if (rowIndices1.size() != rowIndices2.size()) {
                mismatchedRow = (MemRow)hashToRow1.get(hash);
                StringBuilder errorBuilder = new StringBuilder();
                errorBuilder.append(String.format("Row [%s] occurs %d times in dataset %s (at positions %s) but %d times in dataset %s", mismatchedRow.dumpToString(localEqualityColumns, 100), rowIndices1.size(), datasetName1, rowIndices1, rowIndices2.size(), datasetName2));
                if (!rowIndices2.isEmpty()) {
                    errorBuilder.append(String.format(" (at positions %s)", rowIndices2));
                }
                differences.add(errorBuilder.toString());
            }
            keys1Iterator.remove();
            hashToRowIndices2.removeAll((Object)hash);
        }
        if (hashToRowIndices2.isEmpty()) {
            return differences;
        }
        for (String hash : hashToRowIndices2.keySet()) {
            rowIndices2 = hashToRowIndices2.get((Object)hash);
            mismatchedRow = (MemRow)hashToRow2.get(hash);
            differences.add(String.format("Row [%s] occurs %d times in dataset %s (at positions %s) but 0 times in dataset %s", mismatchedRow.dumpToString(localEqualityColumns, 100), rowIndices2.size(), datasetName2, rowIndices2, datasetName1));
        }
        return differences;
    }

    @VisibleForTesting
    String getRowHash(@Nonnull MemRow row, @Nonnull List<String> equalityColumns) {
        ArrayList equalityValues = equalityColumns.stream().map(row::get).collect(Collectors.toCollection(ArrayList::new));
        return DigestUtils.sha3_256Hex((byte[])SerializationUtils.serialize((Serializable)equalityValues));
    }

    @VisibleForTesting
    Pair<Multimap<String, Integer>, Map<String, MemRow>> getTableRowHashMaps(@Nonnull MemTable table, @Nonnull List<String> equalityColumns) {
        HashMultimap hashToRowIndices = HashMultimap.create();
        HashMap<String, MemRow> hashToRow = new HashMap<String, MemRow>();
        for (int i = 0; i < table.rows.size(); ++i) {
            MemRow row = table.rows.get(i);
            String rowHash = this.getRowHash(row, equalityColumns);
            hashToRowIndices.put((Object)rowHash, (Object)i);
            hashToRow.putIfAbsent(rowHash, row);
        }
        return new Pair((Object)hashToRowIndices, hashToRow);
    }

    private long getMaxSampleBytes(String projectKey) throws IOException {
        GeneralSettingsDAO.SoftHardLimit globalLimit = this.generalSettingsDAO.getUnsafe().limits.memSampleBytes;
        GeneralSettingsDAO.SoftHardLimit projectLimit = this.projectsDAO.getMandatoryUnsafe((String)projectKey).settings.limitsSettings.memSampleBytes;
        long resultingSoftLimit = -1L;
        if (globalLimit.soft != -1L && projectLimit.soft != -1L) {
            resultingSoftLimit = Math.min(projectLimit.soft, globalLimit.soft);
        } else if (projectLimit.soft != -1L) {
            resultingSoftLimit = projectLimit.soft;
        } else if (globalLimit.soft != -1L) {
            resultingSoftLimit = globalLimit.soft;
        }
        return resultingSoftLimit;
    }

    private String adjustMessageWithEqualityColumns(@Nonnull String baseMessage, @Nullable List<String> equalityColumns) {
        if (equalityColumns != null) {
            baseMessage = (String)baseMessage + String.format(" (restricted to columns %s)", equalityColumns);
        }
        return baseMessage;
    }

    private static final class CompareDatasetContentsFutureThread
    extends FutureThread<Boolean> {
        private final Dataset dataset1;
        private final Dataset dataset2;
        private final String partitionsSpec1;
        private final String partitionsSpec2;
        private final String contextProjectKey;
        private final List<String> equalityColumns;
        private final long maxSampleBytes;
        private final StepRun currentStepRun;
        private Boolean result;
        private final DatasetComparisonService datasetComparisonService;
        private final ScenarioRunContext scenarioRunContext;
        private static final JSON.Adapter<CompareDatasetContentsFutureThread> serdeAdapter = new JSON.Adapter<CompareDatasetContentsFutureThread>(){

            public JsonElement serialize(CompareDatasetContentsFutureThread futureThread, Type type, JsonSerializationContext context) {
                JsonObject ret = new JsonObject();
                ret.add("owner", context.serialize((Object)futureThread.owner));
                ret.add("dataset1", context.serialize((Object)futureThread.dataset1.serialize()));
                ret.add("dataset2", context.serialize((Object)futureThread.dataset2.serialize()));
                ret.add("partitionsSpec1", context.serialize((Object)futureThread.partitionsSpec1));
                ret.add("partitionsSpec2", context.serialize((Object)futureThread.partitionsSpec2));
                ret.add("contextProjectKey", context.serialize((Object)futureThread.contextProjectKey));
                ret.add("equalityColumns", context.serialize(futureThread.equalityColumns));
                ret.add("maxSampleBytes", context.serialize((Object)futureThread.maxSampleBytes));
                ret.add("currentStepRun", context.serialize((Object)futureThread.currentStepRun));
                return ret;
            }

            public CompareDatasetContentsFutureThread deserialize(JsonElement jsonElement, Type type, JsonDeserializationContext context) throws JsonParseException {
                JsonObject jsonObject = jsonElement.getAsJsonObject();
                DSSAuthCtx owner = (DSSAuthCtx)((Object)context.deserialize(jsonObject.get("owner"), DSSAuthCtx.class));
                Dataset ds1 = Dataset.fromSerialized((SerializedDataset)context.deserialize(jsonObject.get("dataset1"), SerializedDataset.class));
                Dataset ds2 = Dataset.fromSerialized((SerializedDataset)context.deserialize(jsonObject.get("dataset2"), SerializedDataset.class));
                String spec1 = (String)context.deserialize(jsonObject.get("partitionsSpec1"), String.class);
                String spec2 = (String)context.deserialize(jsonObject.get("partitionsSpec2"), String.class);
                String cProjectKey = (String)context.deserialize(jsonObject.get("contextProjectKey"), String.class);
                List eqColumns = (List)context.deserialize(jsonObject.get("equalityColumns"), new TypeToken<List<String>>(){}.getType());
                long maxBytes = (Long)context.deserialize(jsonObject.get("maxSampleBytes"), Long.TYPE);
                StepRun stepRun = (StepRun)context.deserialize(jsonObject.get("currentStepRun"), StepRun.class);
                return new CompareDatasetContentsFutureThread(owner, ds1, ds2, spec1, spec2, cProjectKey, eqColumns, maxBytes, stepRun);
            }
        };

        public CompareDatasetContentsFutureThread(DSSAuthCtx owner, Dataset dataset1, Dataset dataset2, String partitionsSpec1, String partitionsSpec2, String contextProjectKey, List<String> equalityColumns, long maxSampleBytes, StepRun currentStepRun) {
            super(owner);
            this.dataset1 = dataset1;
            this.dataset2 = dataset2;
            this.partitionsSpec1 = partitionsSpec1;
            this.partitionsSpec2 = partitionsSpec2;
            this.equalityColumns = equalityColumns;
            this.contextProjectKey = contextProjectKey;
            this.maxSampleBytes = maxSampleBytes;
            this.currentStepRun = currentStepRun;
            this.datasetComparisonService = (DatasetComparisonService)SpringUtils.getBean(DatasetComparisonService.class);
            this.scenarioRunContext = (ScenarioRunContext)SpringUtils.getBean(ScenarioRunContext.class);
        }

        public FuturePayload getPayload() {
            return FuturePayload.newSimple((String)"compare-dataset-contents", (String)String.format("Comparing the contents of datasets %s and %s", this.dataset1.getSmartName(this.contextProjectKey), this.dataset2.getSmartName(this.contextProjectKey)));
        }

        public double getDangerosity() {
            return 1.0;
        }

        public Boolean getResult() {
            return this.result;
        }

        public void execute() throws Exception {
            this.scenarioRunContext.setStepRun(this.currentStepRun);
            this.scenarioRunContext.startLogAppender();
            DatasetSelectionToMemTable selection1 = this.getDatasetSelection(this.dataset1, this.partitionsSpec1);
            SampleBuilder.Sample sample1 = CompareDatasetContentsFutureThread.getSample(this.owner, this.dataset1, selection1);
            DatasetSelectionToMemTable selection2 = this.getDatasetSelection(this.dataset2, this.partitionsSpec2);
            SampleBuilder.Sample sample2 = CompareDatasetContentsFutureThread.getSample(this.owner, this.dataset2, selection2);
            List<String> differences = this.datasetComparisonService.findTableDifferences(this.dataset1.getSmartName(this.contextProjectKey), sample1.data, this.dataset2.getSmartName(this.contextProjectKey), sample2.data, this.equalityColumns);
            String baseContentsComparisonOutcomeMessage = String.format("The contents of datasets %s and %s", this.dataset1.getSmartName(this.contextProjectKey), this.dataset2.getSmartName(this.contextProjectKey));
            baseContentsComparisonOutcomeMessage = this.datasetComparisonService.adjustMessageWithEqualityColumns(baseContentsComparisonOutcomeMessage, this.equalityColumns);
            if (!differences.isEmpty()) {
                logger.errorV("%s, are different, for the following reasons: %s", new Object[]{baseContentsComparisonOutcomeMessage, differences});
                this.result = false;
                return;
            }
            logger.infoV("%s are the same", new Object[]{baseContentsComparisonOutcomeMessage});
            this.result = true;
        }

        private static SampleBuilder.Sample getSample(@Nonnull AuthCtx authCtx, @Nonnull Dataset ds, @Nonnull DatasetSelectionToMemTable selection) throws Exception {
            SerializedShakerScript.ShakerExplorationSampleSettings sampleSettings = new SerializedShakerScript.ShakerExplorationSampleSettings().withSelection(selection).withAutoRefreshSample(false);
            return SampleBuilder.computeSample(ds, ShakerSamplingUtils.computeSampleId(authCtx, ds, sampleSettings), selection, new WarningsContext(), authCtx, false);
        }

        private DatasetSelectionToMemTable getDatasetSelection(Dataset dataset, String partitionsSpec) {
            DatasetSelectionToMemTable selection = DatasetSelectionToMemTable.newFull().withMaxStoredBytes(this.maxSampleBytes);
            if (!dataset.getModel().isPartitioned()) {
                return selection;
            }
            if (StringUtils.isEmpty((String)partitionsSpec)) {
                throw new IllegalArgumentException("The dataset is partitioned, but no partition specification was specified");
            }
            return selection.withSelectedPartitions(PartitionFactory.fromPartitionSpec(dataset.getPartitioningSchema(), partitionsSpec));
        }

        static {
            JSON.registerAdapter(CompareDatasetContentsFutureThread.class, serdeAdapter);
        }
    }
}

