/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.resources;

import com.dataiku.dip.DatasetDependency;
import com.dataiku.dip.ProcessorWithResourceFiles;
import com.dataiku.dip.code.CodeEnvResolutionService;
import com.dataiku.dip.contribs.PythonContrib;
import com.dataiku.dip.contribs.PythonContribService;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.dataflow.utils.FlowJobUtils;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.SRPAdapter;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.StreamableDatasetSelection;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.export.ZipUnzipDir;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.server.datasets.DatasetAccessService;
import com.dataiku.dip.server.recipes.ShakerRecipeService;
import com.dataiku.dip.shaker.model.ScriptStep;
import com.dataiku.dip.shaker.mrimpl.models.SerializedPythonContribs;
import com.dataiku.dip.shaker.processors.BaseProcessorsFactory;
import com.dataiku.dip.shaker.processors.udf.PythonContribProcessor;
import com.dataiku.dip.shaker.resources.YarnPythonBinResourcesGatherer;
import com.dataiku.dip.shaker.sampleio.SampleWriter;
import com.dataiku.dip.shaker.streamimpl.StreamPipelineFactory;
import com.dataiku.dip.util.AutoDelete;
import com.dataiku.dip.util.DatasetLocUtils;
import com.dataiku.dip.utils.JSON;
import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipException;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;

public class ResourcesGatherer {
    @Autowired
    private DatasetAccessService datasetAccessService;
    @Autowired
    private CodeEnvResolutionService codeEnvResolutionService;
    Map<String, File> allResourceFiles = new HashMap<String, File>();
    HashMap<String, String> resourceMapping = new HashMap();
    List<File> resourceFiles = new ArrayList<File>();
    private static Logger logger = Logger.getLogger((String)"dku.shaker.resources");

    public void gatherResources(List<ScriptStep> steps) throws Exception {
        StreamPipelineFactory.StreamPipeline streamPipeline = StreamPipelineFactory.build(steps, BaseProcessorsFactory.PipelineContext.fakePipelineContext(), false);
        for (Object o : streamPipeline.allProcessors) {
            ProcessorWithResourceFiles rproc;
            if (o instanceof ProcessorWithResourceFiles) {
                rproc = (ProcessorWithResourceFiles)o;
                this.allResourceFiles.putAll(rproc.gatherRequirements());
            }
            if (!(o instanceof SRPAdapter) || !(((SRPAdapter)o).getProcessor() instanceof ProcessorWithResourceFiles)) continue;
            rproc = (ProcessorWithResourceFiles)((SRPAdapter)o).getProcessor();
            this.allResourceFiles.putAll(rproc.gatherRequirements());
        }
    }

    public void dumpAndGatherYarnPythonBins(List<ScriptStep> steps, String projectKey, AuthCtx authCtx) throws IOException, DKUSecurityException {
        this.allResourceFiles.putAll(new YarnPythonBinResourcesGatherer(this.codeEnvResolutionService).dumpAndGather(steps, projectKey, authCtx, logger));
    }

    public void dumpAndGatherAdditionalDatasets(AuthCtx authCtx, String recipeProjectKey, List<ScriptStep> steps) throws Exception {
        List<ShakerRecipeService.StepDependency> additionalDeps = ShakerRecipeService.determineAdditionalDependencies(steps);
        List<DatasetDependency> deduplicatedDeps = ShakerRecipeService.mergeDatasetDependencies((List<DatasetDependency>)FluentIterable.from(additionalDeps).transform((Function)new Function<ShakerRecipeService.StepDependency, DatasetDependency>(){

            public DatasetDependency apply(ShakerRecipeService.StepDependency stepDependency) {
                return stepDependency.dependency;
            }
        }).toList());
        for (DatasetDependency dep : deduplicatedDeps) {
            DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(recipeProjectKey, dep.datasetSM);
            Dataset dataset = this.datasetAccessService.getMandatory(loc);
            logger.info((Object)("Gathering additional dataset " + dataset.getFullName()));
            if (dep.columnNames != null) {
                logger.info((Object)("Keep only relevant columns : " + StringUtils.join(dep.columnNames, (String)", ")));
            } else {
                logger.info((Object)"Keep only relevant columns : they are all required");
            }
            Schema schema = dataset.getSchema();
            if (dep.columnNames != null) {
                schema = schema.filter(dep.columnNames);
            }
            AutoDelete tempDatafile = FlowJobUtils.getJobTempFile("shaker-hadoop", "stepdep_dataset_" + loc.getSmartName(recipeProjectKey), "dss1");
            try (SampleWriter sw = new SampleWriter((File)tempDatafile, true);){
                StreamColumnFactory cf = new StreamColumnFactory();
                StreamRowFactory rf = new StreamRowFactory();
                ProcessorOutput sampleOutput = sw.writeFromProcessor(schema, (ColumnFactory)cf);
                UniversalSingleThreadPusher.push(authCtx, dataset, StreamableDatasetSelection.full(), sampleOutput, (ColumnFactory)cf, (RowFactory)rf);
            }
            logger.info((Object)("Serialized it to file " + String.valueOf(tempDatafile) + ", size = " + tempDatafile.length()));
            this.allResourceFiles.put("dku.shaker.dependency." + loc.getSmartName(recipeProjectKey), (File)tempDatafile);
        }
    }

    public void dumpAndGatherPythonProcessors() throws Exception {
        ArrayList<PythonContrib> contributedProcessors = new ArrayList<PythonContrib>(PythonContribService.getInstance().getContribs());
        logger.info((Object)("Serialized " + contributedProcessors.size() + " Python processor(s)"));
        AutoDelete tempDatafile = FlowJobUtils.getJobTempFile("shaker-hadoop", "contributed_python_processors", "json");
        JSON.prettyToFile((Object)new SerializedPythonContribs(contributedProcessors), (File)tempDatafile);
        this.allResourceFiles.put("dku.shaker.contrib", (File)tempDatafile);
    }

    public void computeMapping() throws IOException {
        for (Map.Entry<String, File> entry : this.allResourceFiles.entrySet()) {
            logger.info((Object)("Job required file " + entry.getKey() + " -> " + String.valueOf(entry.getValue())));
            if (entry.getValue().isDirectory()) {
                logger.info((Object)"Adding a directory -> zip it");
                AutoDelete zipFileNoExt = FlowJobUtils.getJobTempFile("shaker-hadoop", "shadoop-zip-" + entry.getKey(), "zip");
                File zipFile = new File(zipFileNoExt.getAbsolutePath() + ".zip");
                ZipUnzipDir.zipDirectory(entry.getValue(), zipFile);
                String filename = zipFile.getName();
                if (this.resourceMapping.containsValue(filename)) {
                    throw new RuntimeException("Each resource must be named differently (" + filename + ")");
                }
                this.resourceMapping.put(entry.getKey(), filename);
                this.resourceFiles.add(zipFile);
                continue;
            }
            String filename = entry.getValue().getName();
            if (this.resourceMapping.containsValue(filename)) {
                throw new RuntimeException("Each resource must be named differently (" + filename + ")");
            }
            this.resourceFiles.add(entry.getValue());
            this.resourceMapping.put(entry.getKey(), filename);
        }
    }

    public void gatherAndCompute(AuthCtx authCtx, String projectKey, List<ScriptStep> steps) throws Exception {
        this.gatherResources(steps);
        this.dumpAndGatherYarnPythonBins(steps, projectKey, authCtx);
        this.dumpAndGatherAdditionalDatasets(authCtx, projectKey, steps);
        this.dumpAndGatherPythonProcessors();
        this.computeMapping();
    }

    public List<File> getResourceFiles() {
        return this.resourceFiles;
    }

    public Map<String, String> getResourceMapping() {
        return this.resourceMapping;
    }

    public Map<String, String> getFullLocalPathResourceMapping() {
        HashMap reverse = Maps.newHashMap();
        for (Map.Entry<String, String> e : this.resourceMapping.entrySet()) {
            reverse.put(e.getValue(), e.getKey());
        }
        HashMap ret = Maps.newHashMap();
        for (File f : this.resourceFiles) {
            String filename = f.getName();
            if (!reverse.containsKey(filename)) continue;
            ret.put((String)reverse.get(filename), f.getAbsolutePath());
        }
        return ret;
    }

    public static Map<String, File> getLocalResourceFiles(Map<String, String> resourceMapping, List<String> localFiles) throws ZipException, IOException {
        HashMap<String, File> ret = new HashMap<String, File>();
        for (Map.Entry<String, String> entry : resourceMapping.entrySet()) {
            String filename = entry.getValue();
            String resourceID = entry.getKey();
            String foundPath = null;
            logger.info((Object)("Looking for resource " + filename + " in localFiles"));
            for (String currentFile : localFiles) {
                String[] chunks = currentFile.split("/");
                String currentFilename = chunks[chunks.length - 1];
                if (!StringUtils.equals((String)currentFilename, (String)filename)) continue;
                foundPath = currentFile;
                break;
            }
            if (foundPath == null) {
                logger.error((Object)("Did not find resource " + filename + " in localFiles: " + String.valueOf(localFiles)));
                throw new Error("Did not find resource file " + filename);
            }
            logger.info((Object)("Distributed cache has file : " + foundPath));
            if (foundPath.endsWith(".zip")) {
                File newPath = new File(foundPath + "__" + entry.getKey());
                logger.info((Object)("unzip temporary file to " + String.valueOf(newPath)));
                ZipUnzipDir.extractFolder(new File(foundPath), newPath);
                logger.info((Object)("Reassign file " + resourceID + " to path " + String.valueOf(newPath)));
                ret.put(resourceID, newPath);
                continue;
            }
            logger.info((Object)("Reassign file " + resourceID + " to path " + foundPath));
            ret.put(resourceID, new File(foundPath));
        }
        return ret;
    }

    public static void loadContributedProcessors(Map<String, File> resourceMap) throws IOException {
        File contributedProcessorsFile = resourceMap.get("dku.shaker.contrib");
        if (contributedProcessorsFile != null) {
            SerializedPythonContribs contributedProcessors = (SerializedPythonContribs)JSON.parseFile((File)contributedProcessorsFile, SerializedPythonContribs.class);
            for (PythonContrib contrib : contributedProcessors.contribs) {
                BaseProcessorsFactory.addBuiltinProcessor(new PythonContribProcessor(contrib, null).getMeta());
            }
        }
    }
}

