/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.analysis.ml.prediction.split;

import com.dataiku.dip.analysis.ml.MLTaskLoc;
import com.dataiku.dip.analysis.ml.prediction.split.AbstractSingleDatasetSplitGenerator;
import com.dataiku.dip.analysis.ml.prediction.split.SplitDesc;
import com.dataiku.dip.analysis.ml.prediction.split.SplitUtils;
import com.dataiku.dip.analysis.model.SplitParams;
import com.dataiku.dip.analysis.model.core.AnalysisCoreParams;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.ProcessorOutputToSIP;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.input.utils.CountingProcessorOutput;
import com.dataiku.dip.mec.KernelsModelEvaluationStoresService;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.shaker.server.ShakerStreamService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import java.io.File;
import org.apache.commons.codec.digest.DigestUtils;
import org.springframework.beans.factory.annotation.Autowired;

public class SingleDatasetFullSplitGenerator
extends AbstractSingleDatasetSplitGenerator {
    @Autowired
    private ShakerStreamService shakerStreamService;
    @Autowired
    private DatasetAccessService datasetAccessService;
    @Autowired
    private TransactionService transactionService;
    @Autowired
    private KernelsModelEvaluationStoresService kernelsModelEvaluationStoresService;

    public SingleDatasetFullSplitGenerator(MLTaskLoc taskLoc, AnalysisCoreParams coreParams, SplitParams params, AuthCtx authCtx) {
        super(taskLoc, coreParams, params, authCtx);
    }

    @Override
    public String getPolicyId() {
        String policyId = "type=" + String.valueOf((Object)this.params.ttPolicy) + (String)(this.params.streamAll ? ",streamAll=true,kfold=" + this.params.kfold : ",split=RANDOM_KFOLD") + ",folds=" + this.params.nFolds + ",splitBeforePrepare=" + this.params.splitBeforePrepare + ",ds=" + (this.params.ssdDatasetSmartName == null ? this.getDatasetLoc().getSmartName(this.taskLoc.analysisProjectKey) : this.params.ssdDatasetSmartName) + ",sel=(" + this.params.ssdSelection.getIdentifier() + "),r=" + this.params.ssdTrainingRatio + ",s=" + this.params.ssdSeed;
        if (this.params.ssdStratified) {
            policyId = policyId + ",stratified=true";
        }
        if (this.params.ssdGrouped) {
            policyId = policyId + ",grouped=true";
            if (this.params.ssdGroupColumnName != null) {
                policyId = policyId + ",groups=" + this.params.ssdGroupColumnName;
            }
        }
        if (this.params.splitBeforePrepare) {
            return policyId;
        }
        return policyId + ",script=" + DigestUtils.md5Hex((String)this.scriptStepsPrettyStr);
    }

    @Override
    public SplitDesc updateSplitAndSplitDesc(SplitDesc splitDesc, String expectedInstanceId) throws Exception {
        Dataset dataset = null;
        try (Transaction t = this.transactionService.beginRead();){
            dataset = this.datasetAccessService.getMandatory(this.getDatasetLoc());
        }
        StreamRowFactory rf = new StreamRowFactory();
        StreamColumnFactory cf = new StreamColumnFactory();
        File fullPath = SplitUtils.getMlTaskFullSetFile(this.taskLoc, expectedInstanceId);
        CountingProcessorOutput trainWriter = SplitUtils.getWriterToSingleFile(fullPath, splitDesc.schema, (ColumnFactory)cf);
        ProcessorOutputToSIP trainPipeline = this.shakerStreamService.getProcessorOutput(this.authCtx, dataset.getProjectKey(), this.coreParams.script, (ProcessorOutput)trainWriter, (ColumnFactory)cf, (RowFactory)rf);
        UniversalSingleThreadPusher ustp = new UniversalSingleThreadPusher(this.authCtx, dataset, (ProcessorOutput)trainPipeline, (ColumnFactory)cf, (RowFactory)rf);
        ustp.setDatasetSelection(this.params.ssdSelection);
        ustp.push();
        trainPipeline.lastRowEmitted();
        splitDesc.fullPath = fullPath.getName();
        splitDesc.fullRows = trainWriter.getCount();
        return splitDesc;
    }
}

