/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.analysis.ml.clustering.extract;

import com.dataiku.dip.analysis.ml.MLTaskLoc;
import com.dataiku.dip.analysis.ml.prediction.split.SplitDesc;
import com.dataiku.dip.analysis.ml.prediction.split.SplitGenerator;
import com.dataiku.dip.analysis.ml.prediction.split.SplitUtils;
import com.dataiku.dip.analysis.model.clustering.ClusteringMLTask;
import com.dataiku.dip.analysis.model.core.AnalysisCoreParams;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.ProcessorOutputToSIP;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.input.utils.CountingProcessorOutput;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.shaker.server.ShakerStreamService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.util.DatasetLocUtils;
import com.dataiku.dip.utils.JSON;
import java.io.File;
import java.io.IOException;
import org.apache.commons.codec.digest.DigestUtils;
import org.springframework.beans.factory.annotation.Autowired;

public class ClusteringSampleExtractor
extends SplitGenerator {
    @Autowired
    private DatasetAccessService datasetAccessService;
    @Autowired
    private ShakerStreamService shakerStreamService;
    @Autowired
    private TransactionService transactionService;
    private ClusteringMLTask.ClusterSampling params;

    public ClusteringSampleExtractor(MLTaskLoc taskLoc, AnalysisCoreParams coreParams, ClusteringMLTask.ClusterSampling params, AuthCtx authCtx) {
        super(taskLoc, coreParams, authCtx);
        SpringUtils.getInstance().autowire((Object)this);
        this.params = params;
    }

    private DatasetLocUtils.DatasetLoc getDatasetLoc() {
        if (this.params.datasetSmartName == null) {
            return DatasetLocUtils.resolveSmart(this.coreParams.projectKey, this.coreParams.inputDatasetSmartName);
        }
        return DatasetLocUtils.resolveSmart(this.coreParams.projectKey, this.params.datasetSmartName);
    }

    @Override
    public String getPolicyId() {
        if (this.params.extractBeforePrepare) {
            return "extractBeforePrepare=" + this.params.extractBeforePrepare + ",,ds=" + this.getDatasetLoc().getSmartName(this.taskLoc.analysisProjectKey) + ",sel=(" + this.params.selection.getIdentifier() + ")";
        }
        return "extractBeforePrepare=" + this.params.extractBeforePrepare + ",,ds=" + this.getDatasetLoc().getSmartName(this.taskLoc.analysisProjectKey) + ",script=" + DigestUtils.md5Hex((String)this.scriptStepsPrettyStr) + ",sel=(" + this.params.selection.getIdentifier() + ")";
    }

    @Override
    public String getExpectedInstanceId_NT() throws IOException {
        Dataset dataset = null;
        try (Transaction t = this.transactionService.beginRead();){
            dataset = this.datasetAccessService.getMandatory(this.getDatasetLoc());
        }
        return DigestUtils.md5Hex((String)(this.getPolicyId() + "-" + dataset.getSerializedJSONForSemanticComparison() + "-" + this.scriptStepsStr + "-" + JSON.json(this.coreParams.script.analysisColumnData))) + "-" + this.params.instanceIdRefresher;
    }

    @Override
    public SplitDesc initSplitDesc(String expectedPolicyId, String expectedInstanceId, Schema schema) {
        return new SplitDesc(expectedPolicyId, expectedInstanceId, (ClusteringMLTask.ClusterSampling)JSON.deepCopy((Object)this.params), schema);
    }

    @Override
    public SplitDesc updateSplitAndSplitDesc(SplitDesc splitDesc, String expectedInstanceId) throws Exception {
        Dataset dataset = null;
        try (Transaction t = this.transactionService.beginRead();){
            dataset = this.datasetAccessService.getMandatory(this.getDatasetLoc());
        }
        StreamRowFactory rf = new StreamRowFactory();
        StreamColumnFactory cf = new StreamColumnFactory();
        File fullPath = SplitUtils.getMlTaskFullSetFile(this.taskLoc, expectedInstanceId);
        CountingProcessorOutput fullWriter = SplitUtils.getWriterToSingleFile(fullPath, splitDesc.schema, (ColumnFactory)cf);
        ProcessorOutputToSIP fullPipeline = this.shakerStreamService.getProcessorOutput(this.authCtx, dataset.getProjectKey(), this.coreParams.script, (ProcessorOutput)fullWriter, (ColumnFactory)cf, (RowFactory)rf);
        UniversalSingleThreadPusher ustp = new UniversalSingleThreadPusher(this.authCtx, dataset, (ProcessorOutput)fullPipeline, (ColumnFactory)cf, (RowFactory)rf);
        ustp.setDatasetSelection(this.params.selection);
        ustp.push();
        fullWriter.lastRowEmitted();
        splitDesc.fullPath = fullPath.getName();
        splitDesc.fullRows = fullWriter.getCount();
        return splitDesc;
    }
}

