/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.reshaping;

import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.processors.reshaping.UnfoldProcessor;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;
import com.google.gson.reflect.TypeToken;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.mutable.MutableInt;

public class SplitUnfold
extends UnfoldProcessor
implements Processor,
ProcessorWithRecordedReport {
    private final Parameter parameter;
    public static final ProcessorMeta<SplitUnfold, Parameter> META = new ProcessorMeta<SplitUnfold, Parameter>(){

        @Override
        public String getName() {
            return "SplitUnfold";
        }

        @Override
        public String getDocPage() {
            return "split-unfold";
        }

        @Override
        public Category getCategory() {
            return Category.RESHAPING;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.RESHAPING});
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.SplitUnfold.HELP", "This processor splits the values of a column based on a separator and transforms them into several binary columns. Also called 'dummification'.\n\nYou can prefix new columns by filling the \"Prefix\" option.\n\nYou can choose the maximum number of columns to create with the \"Max nb. columns to create\" option.");
        }

        @Override
        public ProcessorDesc describe(String language) {
            return UnfoldProcessor.addColumnLimitationsDesc(language, new ProcessorDesc(this.getName(), this.translate(language, "SHAKER.PROCESSOR.SplitUnfold.DESCRIPTION", 1.actionVerb("Split") + " and " + 1.actionVerb("unfold")), false).withMNEColParam("column", this.translate(language, "SHAKER.PROCESSORS.DESCRIPTION.COLUMN", "Column")).withParam("prefix", "String", false, true, this.translate(language, "SHAKER.PROCESSOR.SplitUnfold.DESCRIPTION.PREFIX", "Prefix (empty for no prefix)")).withMNESParam("separator", this.translate(language, "SHAKER.PROCESSOR.SplitUnfold.DESCRIPTION", "Separator")).withBool("keepEmptyChunks", this.translate(language, "SHAKER.PROCESSOR.SplitUnfold.DESCRIPTION.KEEP_EMPTY_CHUNKS", "Keep empty chunks")));
        }

        @Override
        public Object selfReport(Parameter p) {
            return JSON.deepCopyExcept((Object)p, (String[])new String[]{"column", "prefix"});
        }

        @Override
        public SplitUnfold build(Parameter parameter) throws Exception {
            return new SplitUnfold(parameter);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            Parameter p = (Parameter)params;
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            if (report != null && report.report.has("unfoldedValues")) {
                if (!p.keepEmptyChunks) {
                    ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
                } else {
                    ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: cannot keep empty chunks to SQL");
                }
                ret.withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
            } else {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: no unfoldedValues were recorded");
                ret.withCould(ProcessorCapabilities.NATIVE_SPARK_IMPL, "Cannot use optimized engine: no unfoldedValues were recorded");
            }
            return ret;
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) throws IOException {
            List unfoldedValues = (List)JSON.parse((JsonElement)report.report.get("unfoldedValues"), (TypeToken)new TypeToken<List<String>>(){});
            return new SQLTranslator((Parameter)parameter, unfoldedValues);
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.reshaping.SplitUnfoldNS";
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof Parameter)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            Parameter splitUnfoldParams = (Parameter)pss.params;
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = new DatasetPairLineage((DatasetPairLineage)previousDatasetPairLineage);
                if (pss.designTimeReport != null && pss.designTimeReport.report != null && pss.designTimeReport.report.has("unfoldedValues")) {
                    JsonArray producedColumns = (JsonArray)pss.designTimeReport.report.get("unfoldedValues");
                    for (JsonElement outputColumn : producedColumns) {
                        updatedDatasetPairLineage.addFactorizedColumnRelations(splitUnfoldParams.column, StringUtils.defaultIfBlank((String)splitUnfoldParams.prefix, (String)"") + outputColumn.getAsString());
                    }
                } else {
                    updatedRecipeLineage.setUncertain(true);
                }
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };
    private Column unfoldCD;
    private Map<String, MutableInt> rowCache = new HashMap<String, MutableInt>();
    private List<String> actuallyCreatedColumns = new ArrayList<String>();
    private Set<String> actuallyCreatedColumnsSet = new HashSet<String>();

    public SplitUnfold(Parameter parameter) {
        this.parameter = parameter;
    }

    public void init() {
        this.unfoldCD = this.getColumnFactory().column(this.parameter.column, Processor.ProcessorRole.INPUT_COLUMN);
    }

    public void processRow(Row row) throws Exception {
        String v = row.get(this.unfoldCD);
        if (v == null || v.isEmpty()) {
            return;
        }
        String[] chunks = StringUtils.splitByWholeSeparatorPreserveAllTokens((String)v, (String)this.parameter.separator);
        this.rowCache.clear();
        for (String chunk : chunks) {
            if (!this.parameter.keepEmptyChunks && chunk.length() <= 0) continue;
            MutableInt mi = this.rowCache.get(chunk);
            if (mi == null) {
                mi = new MutableInt(0);
                this.rowCache.put(chunk, mi);
            }
            mi.increment();
        }
        this.enrichCreatedColumns(this.rowCache.keySet(), (UnfoldProcessor.UnfoldParameter)this.parameter);
        for (Map.Entry entry : this.rowCache.entrySet()) {
            if (this.parameter.overflowAction != UnfoldProcessor.OverflowAction.KEEP && !this.createdColumns.contains(entry.getKey())) continue;
            String columnName = this.parameter.prefix == null ? (String)entry.getKey() : this.parameter.prefix + (String)entry.getKey();
            Column c2 = this.getColumnFactory().columnAfter(this.parameter.column, columnName, Processor.ProcessorRole.OUTPUT_COLUMN);
            row.put(c2, ((MutableInt)entry.getValue()).intValue());
            if (!this.actuallyCreatedColumnsSet.add(columnName)) continue;
            this.actuallyCreatedColumns.add(columnName);
        }
    }

    public void postProcess() throws Exception {
    }

    @Override
    public ProcessorWithRecordedReport.ProcessorRecordedReport getRecordedReport() {
        ProcessorWithRecordedReport.ProcessorRecordedReport ret = new ProcessorWithRecordedReport.ProcessorRecordedReport();
        JsonArray arr = new JsonArray();
        for (String acc : this.actuallyCreatedColumns) {
            if (this.parameter.prefix != null) {
                acc = acc.substring(this.parameter.prefix.length());
            }
            arr.add((JsonElement)new JsonPrimitive(acc));
        }
        ret.report.add("unfoldedValues", (JsonElement)arr);
        return ret;
    }

    public static class Parameter
    extends UnfoldProcessor.UnfoldParameter
    implements StepParams {
        private static final long serialVersionUID = -1L;
        public String column;
        String separator;
        public String prefix;
        boolean keepEmptyChunks;

        public void validate() {
        }
    }

    private static class SQLTranslator
    implements ProcessorSQLTranslator {
        private final Parameter parameter;
        private final List<String> unfoldedValues;

        private SQLTranslator(Parameter parameter, List<String> unfoldedValues) {
            this.parameter = parameter;
            this.unfoldedValues = unfoldedValues;
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            ArrayList affectedColumns = Lists.newArrayList((Object[])new String[]{this.parameter.column});
            if (chain.isAnyCreatedOrModifiedByCurrentQuery(affectedColumns)) {
                chain = chain.makeSubquery();
            }
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            ExpressionBuilder expanded = ebf.cst(this.parameter.separator).concat(chain.col(this.parameter.column).replace(this.parameter.separator, this.parameter.separator + this.parameter.separator), ebf.cst(this.parameter.separator + "_"));
            ExpressionBuilder lengthBefore = expanded.length();
            for (String unfoldedValue : this.unfoldedValues) {
                String unfoldedColumn = StringUtils.defaultIfBlank((String)this.parameter.prefix, (String)"") + unfoldedValue;
                String bracketed = this.parameter.separator + unfoldedValue + this.parameter.separator;
                ExpressionBuilder shortened = expanded.replace(bracketed, "");
                ExpressionBuilder occurrences = lengthBefore.minus(shortened.length()).coalesce(ebf.cst(0)).div(ebf.cst(bracketed.length())).castToBigint();
                chain.addColumnAfter(this.parameter.column, new SchemaColumn(unfoldedColumn, Type.BIGINT));
                chain.select(occurrences, unfoldedColumn);
            }
            return chain;
        }
    }
}

