/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.transform;

import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.SingleRowProcessor;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.text.Labelled;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.sql.queries.QueryUtils;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.DKUtils;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.utils.Pair;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.json.JSONArray;

public class ColumnSplitter
extends SingleRowProcessor
implements Processor,
ProcessorWithRecordedReport {
    public static final ProcessorMeta<ColumnSplitter, Parameter> META = new ProcessorMeta<ColumnSplitter, Parameter>(){

        @Override
        public String getName() {
            return "ColumnSplitter";
        }

        @Override
        public String getDocPage() {
            return "split";
        }

        @Override
        public Category getCategory() {
            return Category.TRANSFORMATION;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.STRING, ProcessorTag.SPLIT});
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.HELP", "Split a column into several columns on each occurrence of the delimiter. The output columns are numbered: The first chunk will be in prefix_0, the second in prefix_1, and so on.\n# Examples\n* Split `col=a/b/c` using `/` as the delimiter and `chunk` as the output column prefix.\nOutput: `chunk_0=a`, `chunk_1=b`, `chunk_3=c`\n* Split `col=a/b/c` using `/` as the delimiter, `chunk` as the output column prefix, and keep 2 columns from the beginning.\nOutput: `chunk_0=a`, `chunk_1=b`\n\n# Options\n**Delimiter**\nSeparates values from each input column within the output.\n**Output columns prefix**\nAdd a prefix to identify the output columns.\n**Output as**\nOutput the result(s) of the split as separate columns or as an array (`A-B` \u2192 `[\"A\",\u201dB\u201d]`).\n**Truncate**\nLimit the number of output columns and keep only the first N columns or the N last columns.\n**Keep empty chunks**\nPreserve empty chunks between consecutive delimiters. (`App`, delimiter `p` \u2192 `[\"A\", \u201c\u201d, \u201c\u201d]`)");
        }

        @Override
        public ProcessorDesc describe(String language) {
            return new ProcessorDesc(this.getName(), this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION", 1.actionVerb("Split") + " column"), "Divides cells wherever this substring appears, creating a newcolumn for each division.", false).withMNEColParam("inCol", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.IN_COL", "Column")).withMNESParam("separator", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.SEPARATOR", "Separator string")).withMNESParam("outColPrefix", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.OUT_COL_PREFIX", "Prefix for names of generated output columns")).withParam(ParamDesc.advancedSelect("target", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.TARGET", "Target"), "How to store output", TargetType.class).withDefaultValue(TargetType.COLUMNS)).withBool("limitOutput", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.LIMIT_OUTPUT", "Limit output")).withParam("limit", "int", false, false, this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.LIMIT", "Keep at most")).withParam("startFrom", "string", false, false, this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.START_FROM", "Starting from")).withBool("keepEmptyChunks", this.translate(language, "SHAKER.PROCESSOR.ColumnSplitter.DESCRIPTION.KEEP_EMPTY_CHUNKS", "Keep empty chunks"));
        }

        @Override
        public ColumnSplitter build(Parameter parameter) {
            return new ColumnSplitter(parameter);
        }

        @Override
        public Object selfReport(Parameter parameter) {
            return JSON.deepCopyExcept((Object)parameter, (String[])new String[]{"inCol", "outColPrefix"});
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            Parameter p = (Parameter)params;
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            if (p.target == TargetType.COLUMNS) {
                if (report != null && report.report.has("createdColumnCount")) {
                    ret.withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
                    if (p.limitOutput && StringUtils.endsWith((String)"end", (String)p.startFrom)) {
                        ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: Cannot select limited number of matches from end in SQL");
                    } else if (dialect != null && dialect.regexSupport() == SQLDialect.RegexSupport.NONE) {
                        ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: database doesn't support regular expressions");
                    } else if (dialect != null && dialect.regexSupport() == SQLDialect.RegexSupport.BASIC) {
                        ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: database doesn't support reluctant quantifiers in regular expressions");
                    } else {
                        ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
                    }
                } else {
                    ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot use SQL engine: createdColumnCount was not recorded");
                    ret.withCould(ProcessorCapabilities.NATIVE_SPARK_IMPL, "Cannot use optimized engine: createdColumnCount was not recorded");
                }
            } else if (p.target == TargetType.JSON) {
                ret.withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot produce array objects with SQL engine");
            }
            return ret;
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.transform.ColumnSplitterNS";
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) throws IOException {
            int createdColumnCount = report != null && report.report != null && report.report.has("createdColumnCount") ? report.report.get("createdColumnCount").getAsInt() : 0;
            return new SQLTranslator((Parameter)parameter, createdColumnCount);
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof Parameter)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            Parameter columnSplitterParams = (Parameter)pss.params;
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = new DatasetPairLineage((DatasetPairLineage)previousDatasetPairLineage);
                if (columnSplitterParams.target.equals(TargetType.JSON)) {
                    updatedDatasetPairLineage.addFactorizedColumnRelations(columnSplitterParams.inCol, columnSplitterParams.outColPrefix);
                } else if (pss.designTimeReport != null && pss.designTimeReport.report != null && pss.designTimeReport.report.has("createdColumnCount")) {
                    for (int i = 0; i < pss.designTimeReport.report.get("createdColumnCount").getAsInt(); ++i) {
                        String columnName = columnSplitterParams.outColPrefix + i;
                        updatedDatasetPairLineage.addFactorizedColumnRelations(columnSplitterParams.inCol, columnName);
                    }
                } else {
                    updatedRecipeLineage.setUncertain(true);
                }
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };
    private final Parameter param;
    private Column in;
    private int actuallyCreatedColumnsCount = 0;

    public ColumnSplitter(Parameter parameter) {
        this.param = parameter;
    }

    public void init() {
        this.in = this.getColumnFactory().column(this.param.inCol, Processor.ProcessorRole.INPUT_COLUMN);
    }

    public void processRow(Row row) {
        String iCV = row.get(this.in);
        if (iCV != null) {
            String[] chunks = StringUtils.splitByWholeSeparatorPreserveAllTokens((String)iCV, (String)this.param.separator);
            if (this.param.target == TargetType.COLUMNS) {
                if (this.param.limitOutput && "end".equals(this.param.startFrom)) {
                    int real = 0;
                    Column lastCreated = this.in;
                    for (int i = chunks.length - 1; i >= 0; --i) {
                        if (!this.param.keepEmptyChunks && chunks[i].length() <= 0) continue;
                        lastCreated = this.getColumnFactory().columnAfter(lastCreated.getName(), this.param.outColPrefix + real, Processor.ProcessorRole.OUTPUT_COLUMN);
                        row.put(lastCreated, chunks[i]);
                        this.actuallyCreatedColumnsCount = Math.max(this.actuallyCreatedColumnsCount, ++real);
                        if (real != this.param.limit) {
                            continue;
                        }
                        break;
                    }
                } else {
                    int real = 0;
                    Column lastCreated = this.in;
                    for (String chunk : chunks) {
                        if (!this.param.keepEmptyChunks && chunk.length() <= 0) continue;
                        lastCreated = this.getColumnFactory().columnAfter(lastCreated.getName(), this.param.outColPrefix + real, Processor.ProcessorRole.OUTPUT_COLUMN);
                        row.put(lastCreated, chunk);
                        this.actuallyCreatedColumnsCount = Math.max(this.actuallyCreatedColumnsCount, ++real);
                        if (!this.param.limitOutput || real != this.param.limit) {
                            continue;
                        }
                        break;
                    }
                }
            } else {
                JSONArray target = new JSONArray();
                if (this.param.limitOutput && "end".equals(this.param.startFrom)) {
                    int real = 0;
                    for (int i = chunks.length - 1; i >= 0; --i) {
                        if (!this.param.keepEmptyChunks && chunks[i].length() <= 0) continue;
                        target.put((Object)chunks[i]);
                        if (++real != this.param.limit) {
                            continue;
                        }
                        break;
                    }
                } else {
                    int real = 0;
                    for (String chunk : chunks) {
                        if (!this.param.keepEmptyChunks && chunk.length() <= 0) continue;
                        target.put((Object)chunk);
                        if (!this.param.limitOutput || ++real != this.param.limit) {
                            continue;
                        }
                        break;
                    }
                }
                row.put(this.getColumnFactory().columnAfter(this.in.getName(), this.param.outColPrefix, Processor.ProcessorRole.OUTPUT_COLUMN), target.toString());
            }
        }
    }

    public void postProcess() {
    }

    @Override
    public ProcessorWithRecordedReport.ProcessorRecordedReport getRecordedReport() {
        ProcessorWithRecordedReport.ProcessorRecordedReport ret = new ProcessorWithRecordedReport.ProcessorRecordedReport();
        ret.report.addProperty("createdColumnCount", (Number)this.actuallyCreatedColumnsCount);
        return ret;
    }

    public static class Parameter
    implements StepParams {
        private static final long serialVersionUID = -1L;
        public String inCol;
        public String separator;
        public String outColPrefix;
        public boolean keepEmptyChunks;
        public TargetType target = TargetType.COLUMNS;
        boolean limitOutput = false;
        int limit = 0;
        String startFrom = null;

        public void validate() {
            if (this.limitOutput && this.limit <= 0) {
                throw ErrorContext.iae((String)"'limit' must be strictly positive");
            }
            if (this.limitOutput && this.startFrom == null) {
                throw ErrorContext.iae((String)"'startFrom'  is mandatory when 'limitOutput' is enabled");
            }
            if (this.startFrom != null && !"beginning".equals(this.startFrom) && !"end".equals(this.startFrom)) {
                throw ErrorContext.iae((String)"'startFrom' must be null, 'beginning' or 'end'");
            }
        }
    }

    /*
     * Uses 'sealed' constructs - enablewith --sealed true
     */
    public static enum TargetType implements Labelled
    {
        JSON{

            @Override
            public String getLabel() {
                return "Array (JSON)";
            }
        }
        ,
        COLUMNS{

            @Override
            public String getLabel() {
                return "Multiple columns";
            }
        };

    }

    private static class SQLTranslator
    implements ProcessorSQLTranslator {
        private final Parameter parameter;
        private final int createdColumnCount;

        private SQLTranslator(Parameter parameter, int createdColumnCount) {
            this.parameter = parameter;
            this.createdColumnCount = createdColumnCount;
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            boolean splitMode = chain.getDialect().getOperator(QueryUtils.OperatorType.SPLIT) != null && chain.getDialect().getOperator(QueryUtils.OperatorType.GET) != null;
            boolean splitPartMode = chain.getDialect().getOperator(QueryUtils.OperatorType.SPLIT_PART) != null;
            ArrayList affectedColumns = Lists.newArrayList((Object[])new String[]{this.parameter.inCol});
            if (chain.isAnyCreatedOrModifiedByCurrentQuery(affectedColumns)) {
                chain = chain.makeSubquery();
            }
            Object lastColumn = this.parameter.inCol;
            SchemaColumn inputSchemaColumn = chain.getMandatoryCurrentColumn(this.parameter.inCol);
            String separatorPatternBit = DKUtils.escapeRegex((String)this.parameter.separator);
            ExpressionBuilder simplified = chain.col(this.parameter.inCol);
            if (this.parameter.keepEmptyChunks) {
                simplified = simplified.regexpReplace(separatorPatternBit, "__dku_sep__");
            } else {
                simplified = simplified.regexpReplace(String.format("(%s)+", separatorPatternBit), "__dku_sep__");
                simplified = simplified.regexpReplace("(^__dku_sep__)|(__dku_sep__$)", "");
            }
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            for (int i = 0; i < this.createdColumnCount; ++i) {
                String columnName = this.parameter.outColPrefix + i;
                int maxLength = inputSchemaColumn.getMaxLength() > 0 ? inputSchemaColumn.getMaxLength() : chain.getDialect().getDefaultVarcharLen();
                chain.addColumnAfter((String)lastColumn, new SchemaColumn(columnName, Type.STRING, maxLength));
                if (splitMode) {
                    chain.select(simplified.splitString(ebf.cst("__dku_sep__")).getInArray(ebf.cst(i)).castToString(maxLength), columnName);
                } else if (splitPartMode) {
                    chain.select(simplified.splitPartString(ebf.cst("__dku_sep__"), i + 1).castToString(maxLength), columnName);
                } else {
                    String replacement;
                    String regex;
                    if (i == 0) {
                        regex = "((^(.*?)(__dku_sep__.*)$)|(^.*$))()";
                        replacement = chain.getDialect().captureGroup(3) + chain.getDialect().captureGroup(5);
                    } else {
                        regex = String.format("((^(.*?__dku_sep__){%d}(((.*?)(__dku_sep__.*))|(.*))$)|(^.*$))()", i);
                        replacement = chain.getDialect().captureGroup(6) + chain.getDialect().captureGroup(8);
                    }
                    chain.select(simplified.regexpReplace(regex, replacement).castToString(maxLength), columnName);
                }
                lastColumn = columnName;
            }
            return chain;
        }
    }
}

