/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.analysis.ml.clustering.flow;

import com.dataiku.dip.analysis.ml.FullModelId;
import com.dataiku.dip.analysis.ml.MLFlowUtils;
import com.dataiku.dip.analysis.ml.MLPaths;
import com.dataiku.dip.analysis.ml.ScoringRecipeUtils;
import com.dataiku.dip.analysis.ml.clustering.ClusteringResultsReader;
import com.dataiku.dip.analysis.ml.clustering.flow.ClusteringScoringRecipePayloadParams;
import com.dataiku.dip.analysis.ml.prediction.flow.AbstractScoringRecipeRunner;
import com.dataiku.dip.analysis.model.MLTask;
import com.dataiku.dip.analysis.model.clustering.ClusteringModelDetails;
import com.dataiku.dip.analysis.model.core.ResolvedCoreParams;
import com.dataiku.dip.cluster.SparkSettings;
import com.dataiku.dip.code.CodeEnvModel;
import com.dataiku.dip.containers.exec.ContainerExecConfigSelector;
import com.dataiku.dip.containers.exec.ContainerExecRuntimeConfig;
import com.dataiku.dip.containers.exec.ContainerExecSelection;
import com.dataiku.dip.containers.exec.KubernetesExecUtils;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SimpleKeyValue;
import com.dataiku.dip.dao.SavedModel;
import com.dataiku.dip.dataflow.JobActivity;
import com.dataiku.dip.dataflow.exec.AbstractPythonRecipeRunner;
import com.dataiku.dip.dataflow.exec.AbstractSparkBasedRecipeRunner;
import com.dataiku.dip.dataflow.exec.ContainerRecipeParams;
import com.dataiku.dip.dataflow.exec.SparkExecutionEnginesHelper;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.dataflow.graph.FlowSavedModel;
import com.dataiku.dip.dataflow.jobrunner.JobContext;
import com.dataiku.dip.dataflow.utils.FlowJobUtils;
import com.dataiku.dip.recipes.InitializableAbortableRecipeRunner;
import com.dataiku.dip.recipes.code.spark.SparkRecipeUtils;
import com.dataiku.dip.recipes.consistency.RecipeCodes;
import com.dataiku.dip.remoterun.RemoteRunsRegistry;
import com.dataiku.dip.security.impersonation.FilesystemACLUtils;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.server.services.SingleWriteTransactionTransactionService;
import com.dataiku.dip.shaker.model.SerializedShakerScript;
import com.dataiku.dip.spark.SparkJob;
import com.dataiku.dip.spark.SparkJobHelper;
import com.dataiku.dip.spark.SparkOverrideConfig;
import com.dataiku.dip.util.AnyLoc;
import com.dataiku.dip.util.AutoDelete;
import com.dataiku.dip.utils.CollectionUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.NotImplementedException;
import com.google.common.collect.Lists;
import com.google.gson.JsonObject;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;

public class ClusteringScoringRecipeRunner
extends AbstractScoringRecipeRunner {
    @Autowired
    private DatasetAccessService datasetAccessService;
    private ClusteringScoringRecipePayloadParams desc;
    private static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.clustering.scoring");

    public ClusteringScoringRecipeRunner(JobActivity activity) {
        super(activity);
    }

    @Override
    public void setPayload(String payload) {
        this.desc = (ClusteringScoringRecipePayloadParams)JSON.parse((String)payload, ClusteringScoringRecipePayloadParams.class);
    }

    @Override
    public void init() {
        SpringUtils.getInstance().autowire((Object)this.gatherer);
        this.authCtx = this.authCtxService.getAuthCtx();
    }

    private InitializableAbortableRecipeRunner createRunner(final Schema preparationOSchemaToUse, final ContainerExecRuntimeConfig clusteringContainerConfig, final String inputDatasetSmartName, final String outputDatasetSmartName, final SerializedShakerScript script, final SavedModel sm, final FullModelId fmi, final File activeModelFolder, ContainerExecSelection containerSelection) throws Exception {
        switch (this.desc.backendType) {
            case PY_MEMORY: {
                final File additionalLogsDir = FlowJobUtils.getJobMadeDir("clustering-recipe", "additional-logs");
                final File mainLogFile = FlowJobUtils.getJobTouchedFile("clustering-recipe", "python.log");
                JobContext.getCurrentActivitySummary().engineType = "DSS";
                return new AbstractPythonRecipeRunner(this.activity){

                    /*
                     * Enabled aggressive block sorting
                     * Enabled unnecessary exception pruning
                     * Enabled aggressive exception aggregation
                     */
                    @Override
                    public void run() throws Exception {
                        try (AutoDelete outputTmpDir = FlowJobUtils.getTmpFolder("clustering-recipe", "pyrun");){
                            JSON.prettyToFile((Object)ClusteringScoringRecipeRunner.this.desc, (File)new File((File)outputTmpDir, "desc.json"));
                            JSON.prettyToFile((Object)script, (File)new File((File)outputTmpDir, "script.json"));
                            JSON.prettyToFile((Object)preparationOSchemaToUse, (File)new File((File)outputTmpDir, "preparation_output_schema.json"));
                            FilesystemACLUtils.grantFSReadACLs(ClusteringScoringRecipeRunner.this.authCtx, this.projectKey, fmi.getFolderEnsuringSecurity());
                            FilesystemACLUtils.grantFSFullACLs(ClusteringScoringRecipeRunner.this.authCtx, this.projectKey, activeModelFolder);
                            FilesystemACLUtils.grantFSFullACLs(ClusteringScoringRecipeRunner.this.authCtx, this.projectKey, new File[]{outputTmpDir});
                            FullModelId fmi2 = new FullModelId(sm.projectKey, sm.id, sm.activeVersion);
                            ResolvedCoreParams rcp = fmi2.getResolvedCoreParams();
                            String envName = rcp.executionParams.envName;
                            CodeEnvModel.UsedCodeEnvRef codeEnvRef = new CodeEnvModel.UsedCodeEnvRef(CodeEnvModel.EnvLang.PYTHON, envName);
                            logger.info((Object)("Run clustering in code env " + StringUtils.defaultIfBlank((String)envName, (String)"built-in")));
                            if (clusteringContainerConfig == null) {
                                this.executeModule(envName, (File)outputTmpDir, "dataiku.doctor.clustering.reg_scoring_recipe", activeModelFolder.getAbsolutePath(), inputDatasetSmartName, outputDatasetSmartName, new File((File)outputTmpDir, "desc.json").getAbsolutePath(), new File((File)outputTmpDir, "script.json").getAbsolutePath(), new File((File)outputTmpDir, "preparation_output_schema.json").getAbsolutePath(), fmi2.toString());
                                return;
                            }
                            List<String> readablePaths = Collections.singletonList(activeModelFolder.getAbsolutePath());
                            JsonObject names = new JsonObject();
                            names.addProperty("inputDatasetSmartName", inputDatasetSmartName);
                            names.addProperty("outputDatasetSmartName", outputDatasetSmartName);
                            names.addProperty("inputModel", fmi2.toString());
                            switch (clusteringContainerConfig.type) {
                                case DOCKER: {
                                    this.executeDockerCodeRecipe(codeEnvRef, clusteringContainerConfig, activeModelFolder, mainLogFile, outputTmpDir, RemoteRunsRegistry.ExecutionType.RECIPE_CLUSTERING_SCORE_PYTHON, names.toString(), Collections.emptyMap(), readablePaths);
                                    return;
                                }
                                case KUBERNETES: {
                                    this.executeKubernetesCodeRecipe(codeEnvRef, clusteringContainerConfig, activeModelFolder, mainLogFile, additionalLogsDir, outputTmpDir, RemoteRunsRegistry.ExecutionType.RECIPE_CLUSTERING_SCORE_PYTHON, names.toString(), Collections.emptyMap(), readablePaths, new KubernetesExecUtils.KubernetesFailureCodeProvider(){

                                        @Override
                                        public InfoMessage.MessageCode codeForOOMKilled() {
                                            return RecipeCodes.ERR_RECIPE_ML_SCORING_K8S_OOM;
                                        }
                                    });
                                    return;
                                }
                            }
                            return;
                        }
                    }

                    @Override
                    public void init() throws Exception {
                    }
                };
            }
            case H2O: 
            case MLLIB: {
                JobContext.getCurrentActivitySummary().engineType = "SPARK";
                if (containerSelection.containerMode == ContainerExecSelection.ContainerExecMode.EXPLICIT_CONTAINER) {
                    logger.warnV("Ignoring container configuration %s, not compatible with Spark scoring.", new Object[]{containerSelection.containerConf});
                }
                final String hiveDb = SparkRecipeUtils.getHiveMetastoreDatabase(this.activity, this.datasetsDAO);
                return new AbstractSparkBasedRecipeRunner(this.activity){

                    @Override
                    public void run() throws Exception {
                        try (final AutoDelete outputTmpDir = FlowJobUtils.getTmpFolder("clustering-recipe", "sparkrun");){
                            SerializedShakerScript expandedScript = script.expandedDeepCopy(this.variablesService.getForProject(this.projectKey));
                            ClusteringScoringRecipeRunner.this.gatherer.gatherAndCompute(ClusteringScoringRecipeRunner.this.authCtx, this.projectKey, expandedScript.steps);
                            JSON.prettyToFile((Object)ClusteringScoringRecipeRunner.this.desc, (File)new File((File)outputTmpDir, "desc.json"));
                            JSON.prettyToFile((Object)expandedScript, (File)new File((File)outputTmpDir, "script.json"));
                            JSON.prettyToFile(ClusteringScoringRecipeRunner.this.gatherer.getResourceMapping(), (File)new File((File)outputTmpDir, "resource_mapping.json"));
                            JSON.prettyToFile((Object)preparationOSchemaToUse, (File)new File((File)outputTmpDir, "preparation_output_schema.json"));
                            this.runSpark("clustering", ClusteringScoringRecipeRunner.this.desc.sparkParams.sparkExecutionEngine, new SparkExecutionEnginesHelper.SparkRecipeJobBuilder(){

                                @Override
                                public <T extends SparkJob> T buildSparkJob(SparkJobHelper<T> helper, File runDir, SparkSettings sparkSettings, List<SimpleKeyValue> effectiveConf) throws Exception {
                                    return helper.makeClassJobWithNonSecretGlobalFiles("DSS (score): " + activity.id(), effectiveConf, ClusteringScoringRecipeRunner.this.gatherer.getResourceFiles(), ClusteringScoringRecipeRunner.this.desc.backendType == MLTask.BackendType.H2O, "com.dataiku.dip.spark.MLLibClusteringScoringJob", activeModelFolder.getAbsolutePath(), recipe.getProjectKey(), inputDatasetSmartName, outputDatasetSmartName, outputTmpDir.getAbsolutePath());
                                }

                                @Override
                                public SparkOverrideConfig getRecipeOverrideConf() {
                                    return ClusteringScoringRecipeRunner.this.desc.sparkParams.sparkConf;
                                }

                                @Override
                                public Map<String, String> getContextOverrideConf() {
                                    return CollectionUtils.appendableSSMap().put("spark.dku.ml.preparedDF.storageLevel", ClusteringScoringRecipeRunner.this.desc.sparkParams.sparkPreparedDFStorageLevel).put("spark.dku.ml.repartitionNonHDFS", String.valueOf(ClusteringScoringRecipeRunner.this.desc.sparkParams.sparkRepartitionNonHDFS)).put("spark.dku.ml.useGlobalMetastore", Boolean.toString(ClusteringScoringRecipeRunner.this.desc.sparkParams.sparkUseGlobalMetastore)).put("spark.dku.ml.hiveDb", StringUtils.defaultIfBlank((String)hiveDb, (String)"")).get();
                                }

                                @Override
                                public List<File> getExtraRecursiveFolders() {
                                    return Lists.newArrayList((Object[])new File[]{activeModelFolder, outputTmpDir});
                                }
                            }, null);
                        }
                    }

                    @Override
                    public void init() throws Exception {
                    }
                };
            }
        }
        throw new NotImplementedException("Unsupported backend type: " + String.valueOf((Object)this.desc.backendType));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void run() throws Exception {
        List<FlowDataset> inputFDS = this.activity.getSubgraph().getSourceDatasets();
        if (inputFDS.size() == 0) {
            throw ErrorContext.iae((String)"Missing input dataset in scoring recipe");
        }
        Dataset inputDataset = this.getInputDataset("main");
        Dataset outputDataset = this.getOutputDataset("main");
        FlowSavedModel fsm = MLFlowUtils.getSMInput(this.activity);
        SavedModel sm = fsm.getSavedModel();
        MLFlowUtils.checkActiveVersion(sm);
        File activeModelFolder = MLPaths.savedModelVersionFolder(sm, sm.activeVersion);
        SerializedShakerScript script = (SerializedShakerScript)JSON.parseFile((File)new File(activeModelFolder, "script.json"), SerializedShakerScript.class);
        script.contextProjectKey = sm.projectKey;
        String hadMetadataColName = ScoringRecipeUtils.ModelMetadataUtils.schemaIncludesModelMetadata(inputDataset.getSchema());
        if (this.desc.outputModelMetadata && hadMetadataColName != null) {
            throw new Exception("\"" + hadMetadataColName + "\" is a reserved column name for model metadata output");
        }
        FullModelId fmi = new FullModelId(sm.projectKey, sm.id, sm.activeVersion);
        Schema inferredPreparationOSchema = null;
        ((SingleWriteTransactionTransactionService)SpringUtils.getBean(SingleWriteTransactionTransactionService.class)).stashTheSingleTransaction();
        try {
            inferredPreparationOSchema = MLFlowUtils.getInferredPreparationOutputSchema_NT(sm.projectKey, inputDataset, script, outputDataset.getType(), this.authCtx);
        }
        finally {
            ((SingleWriteTransactionTransactionService)SpringUtils.getBean(SingleWriteTransactionTransactionService.class)).unstashTheSingleTransaction();
        }
        String inputDatasetSmartName = AnyLoc.resolveFull(inputDataset.getFullName()).getSmartName(this.recipe.getProjectKey());
        String outputDatasetSmartName = AnyLoc.resolveFull(outputDataset.getFullName()).getSmartName(this.recipe.getProjectKey());
        ContainerExecSelection containerSelection = this.recipe.getModel().getParamsAs(ContainerRecipeParams.class).getContainerSelection();
        ContainerExecRuntimeConfig clusteringContainerConfig = new ContainerExecConfigSelector().selectForML_autoTXN(this.authCtx, this.recipe.getProjectKey(), containerSelection, this.desc.backendType);
        logger.info((Object)"Adapting inferred output schema from training schema to avoid inconsistencies");
        ClusteringModelDetails details = ClusteringResultsReader.makeDetails(new FullModelId(sm.projectKey, sm.getId(), sm.activeVersion));
        Schema preparationOSchemaToUse = MLFlowUtils.getSchemaToUseForPreparedScoringInput(details.preprocessing, details.splitDesc.schema, inferredPreparationOSchema, false, false, this.activity);
        InitializableAbortableRecipeRunner runner = this.createRunner(preparationOSchemaToUse, clusteringContainerConfig, inputDatasetSmartName, outputDatasetSmartName, script, sm, fmi, activeModelFolder, containerSelection);
        this.startRunner(runner);
    }

    private Dataset getInputDataset(String role) throws IOException {
        return this.datasetAccessService.getMandatory(this.recipe.getModel().getSingleInput(role).getLoc(this.recipe.getProjectKey()));
    }

    private Dataset getOutputDataset(String role) throws IOException {
        return this.datasetAccessService.getMandatory(this.recipe.getModel().getSingleOutput(role).getLoc(this.recipe.getProjectKey()));
    }
}

