/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.transform;

import com.dataiku.dip.connections.AbstractSQLConnection;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.AppliesToProcessor;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.PrepareSnowflakeUDFUtils;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.processors.transform.StringTransformation;
import com.dataiku.dip.shaker.processors.transform.StringTransformationSettings;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.sql.SnowflakeUDFProcessorTranslator;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.util.SecretKeyGenerator;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.List;
import java.util.Set;

public class StringTransformer {
    public static final ProcessorMeta<StreamImpl, Parameter> META = new AppliesToProcessor.AppliesToProcessorMeta<StreamImpl, Parameter>(){

        @Override
        public String getName() {
            return "StringTransformer";
        }

        @Override
        public String getDocPage() {
            return "string-transform";
        }

        @Override
        public Category getCategory() {
            return Category.TRANSFORMATION;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.STRING});
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.StringTransformer.HELP", "Perform a variety of encoding, decoding, and string transformations on one or several columns. Transformations are always done in-place. For more advanced transformations, use the <a target=\"_blank\" href=\"https://doc.dataiku.com/dss/latest/preparation/processors/formula.html\">Formula processor</a>.\n\n# Options\n\n**Column**\n\nApply transformation to the following: \n\n* A single column\n\n* An explicit list of columns\n\n* All columns matching a regex pattern\n\n* All columns\n\n**Mode**\n\nSelect transformation to apply:\n\n* **Convert to uppercase/lowercase:** convert all text to upper or lower case\n\n* **Encode/decode URL:** form URL escape (*nice 7%* -> *nice%207%25*) or unescape (*nice%207%25* -> *nice 7%*)\n\n* **Escape/unescape XML entities:** replace *'&amp;lt;'*, *'&ampgt;'*, and *'&amp;amp;'* by *<*, *>* and *&* respectively in XML strings\n\n* **Escape/unescape Unicode values:** replace Unicode characters by their codepoint: *\u00e9* -> *\\u00e9* or the opposite\n\n* **Remove leading/trailing whitespace**: trim\n\n* **Capitalize:** put a capital letter at the beginning of each cell\n\n* **Capitalize every word:** put a capital letter at the beginning of each word in the cell\n\n* **Normalize:** convert to lowercase, remove accents, and perform Unicode normalization (*Caf\u00e9* -> *cafe*)\n\n* **Truncate:** keep only the first N characters of the cell\n");
        }

        @Override
        public ProcessorDesc describe(String language) {
            return new ProcessorDesc(this.getName(), this.translate(language, "SHAKER.PROCESSOR.StringTransformer.DESCRIPTION", 1.actionVerb("Transform") + " string"), false).withParam("truncate_limit", "generic", false, true, this.translate(language, "SHAKER.PROCESSOR.StringTransformer.DESCRIPTION.TRUNCATE_LIMIT", "Truncate limit"));
        }

        @Override
        public Object selfReport(Parameter parameter) {
            return AppliesToProcessor.selfReport(parameter);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams sp, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            return this.getCapabilities(sp, report, dialect, null);
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams sp, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect, AbstractSQLConnection conn) {
            Parameter p = (Parameter)sp;
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            ret.withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
            if (p.mode == StringTransformation.TO_LOWER || p.mode == StringTransformation.TO_UPPER) {
                ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
            } else if (PrepareSnowflakeUDFUtils.canUseSnowflakeUDF(conn) && p.mode != StringTransformation.TRUNCATE) {
                ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
            } else {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, String.format("Cannot use SQL engine: %s mode is not translatable", p.mode));
            }
            return ret;
        }

        @Override
        public StreamImpl build(Parameter parameter) {
            return new StreamImpl(parameter);
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report, AbstractSQLConnection conn) {
            Parameter p = (Parameter)parameter;
            if (p.mode == StringTransformation.TO_LOWER || p.mode == StringTransformation.TO_UPPER) {
                return new SQLTranslator(p);
            }
            if (PrepareSnowflakeUDFUtils.canUseSnowflakeUDF(conn) && p.mode != StringTransformation.TRUNCATE) {
                return new SnowflakeUDFSQLTranslator(p);
            }
            throw new Error("unreachable");
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.transform.StringTransformerNS";
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            return previousRecipeLineage;
        }
    };

    private static class StreamImpl
    extends AppliesToProcessor
    implements Processor {
        private final Parameter parameter;
        private final StringTransformationSettings transformationSettings;

        public StreamImpl(Parameter parameter) {
            this.parameter = parameter;
            this.transformationSettings = new StringTransformationSettings();
            this.transformationSettings.truncate_limit = parameter.truncate_limit;
        }

        @Override
        public AppliesToProcessor.AppliesToParams getParams() {
            return this.parameter;
        }

        @Override
        public void processRowForColumns(Row row, Iterable<Column> columns) throws Exception {
            for (Column cd : columns) {
                String v = row.get(cd);
                if (v == null || v.isEmpty()) continue;
                String transformedValue = this.parameter.mode.transform(v, this.transformationSettings);
                row.put(cd, transformedValue);
            }
        }

        public void postProcess() {
        }
    }

    private static class SnowflakeUDFSQLTranslator
    implements SnowflakeUDFProcessorTranslator {
        private final Parameter parameter;
        private final String functionName;

        private SnowflakeUDFSQLTranslator(Parameter parameter) {
            this.parameter = parameter;
            this.functionName = "stringTransformer_" + SecretKeyGenerator.generate();
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> getUDFResources() throws IOException {
            List<SnowflakeUDFProcessorTranslator.SnowflakeUDFResource> resources = SnowflakeUDFProcessorTranslator.createStandardResourceList();
            SnowflakeUDFProcessorTranslator.addStandardResources(resources, SnowflakeUDFProcessorTranslator.StandardResource.SHADELIB, SnowflakeUDFProcessorTranslator.StandardResource.COMMONS_LANG_JAR);
            return resources;
        }

        @Override
        public List<SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef> getUDFs() {
            SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef def = new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef(this.functionName, "com.dataiku.dip.shaker.processors.transform.StringTransformerUDF.process", "data STRING, mode STRING", "STRING, STRING", "STRING");
            def.importStandardResources(SnowflakeUDFProcessorTranslator.StandardResource.SHADELIB, SnowflakeUDFProcessorTranslator.StandardResource.COMMONS_LANG_JAR);
            return Lists.newArrayList((Object[])new SnowflakeUDFProcessorTranslator.SnowflakeFunctionDef[]{def});
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            SQLDialect d = chain.getDialect();
            List<String> affectedColumns = chain.getAppliesToColumns(this.parameter);
            if (chain.isAnyCreatedOrModifiedByCurrentQuery(affectedColumns)) {
                chain = chain.makeSubquery();
            }
            ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();
            for (String column : affectedColumns) {
                ExpressionBuilder eb = ebf.expr(String.format("%s(%s, %s)", this.functionName, d.quoteIdentifier(column), d.quoteString(this.parameter.mode.name())));
                chain.replaceSelect(column, eb, column);
                chain.markColumnModified(column);
            }
            return chain;
        }
    }

    private static class SQLTranslator
    implements ProcessorSQLTranslator {
        private final Parameter parameter;

        private SQLTranslator(Parameter parameter) {
            this.parameter = parameter;
        }

        @Override
        public SQLQueryWithSchema translate(SQLQueryWithSchema chain) {
            List<String> affectedColumns = chain.getAppliesToColumns(this.parameter);
            if (chain.isAnyCreatedOrModifiedByCurrentQuery(affectedColumns)) {
                chain = chain.makeSubquery();
            }
            for (String column : affectedColumns) {
                ExpressionBuilder eb = null;
                switch (this.parameter.mode) {
                    case TO_LOWER: {
                        eb = chain.col(column).lower();
                        break;
                    }
                    case TO_UPPER: {
                        eb = chain.col(column).upper();
                        break;
                    }
                    default: {
                        throw new Error("unreachable");
                    }
                }
                chain.replaceSelect(column, eb, column);
                chain.markColumnModified(column);
            }
            return chain;
        }
    }

    public static class Parameter
    extends AppliesToProcessor.AppliesToParams {
        private static final long serialVersionUID = -1L;
        StringTransformation mode;
        Integer truncate_limit = 0;

        @Override
        public void validate() throws IllegalArgumentException {
            super.validate();
            if (this.mode == StringTransformation.TRUNCATE) {
                this.truncate_limit = Math.max(0, this.truncate_limit);
            }
        }
    }
}

