/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.processors.time;

import com.dataiku.dip.ProcessorWithResourceFiles;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.Processor;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalineage.DatasetPairLineage;
import com.dataiku.dip.datalineage.RecipeLineage;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.shaker.DateTimeFormatterFix;
import com.dataiku.dip.shaker.ProcessorWithRecordedReport;
import com.dataiku.dip.shaker.model.ProcessorScriptStep;
import com.dataiku.dip.shaker.model.StepParams;
import com.dataiku.dip.shaker.processors.AppliesToProcessor;
import com.dataiku.dip.shaker.processors.Category;
import com.dataiku.dip.shaker.processors.ProcessorCapabilities;
import com.dataiku.dip.shaker.processors.ProcessorMeta;
import com.dataiku.dip.shaker.processors.ProcessorTag;
import com.dataiku.dip.shaker.processors.time.Language;
import com.dataiku.dip.shaker.processors.time.TimezonableProcessor;
import com.dataiku.dip.shaker.server.ProcessorDesc;
import com.dataiku.dip.shaker.services.smartdate.DateFormatGuesser;
import com.dataiku.dip.shaker.sql.ProcessorSQLTranslator;
import com.dataiku.dip.shaker.sql.SQLQueryWithSchema;
import com.dataiku.dip.shaker.types.DatetimeNoTz;
import com.dataiku.dip.sql.SQLCapability;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.sql.queries.QueryUtils;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.DKULoggerFilter;
import com.dataiku.dip.utils.LimitFilter;
import com.dataiku.dip.utils.Pair;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.org.joda.time.DateTimeZone;
import com.dataiku.dss.shadelib.org.joda.time.MutableDateTime;
import com.dataiku.dss.shadelib.org.joda.time.ReadWritableInstant;
import com.dataiku.dss.shadelib.org.joda.time.ReadableInstant;
import com.dataiku.dss.shadelib.org.joda.time.ReadablePartial;
import com.dataiku.dss.shadelib.org.joda.time.format.DateTimeFormatter;
import com.dataiku.dss.shadelib.org.joda.time.format.ISODateTimeFormat;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.gson.JsonObject;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;

public class DateParser
extends AppliesToProcessor
implements ProcessorWithResourceFiles {
    public static final ProcessorMeta<DateParser, Parameter> META = new AppliesToProcessor.AppliesToProcessorMeta<DateParser, Parameter>(){

        @Override
        public String getName() {
            return "DateParser";
        }

        @Override
        public String getDocPage() {
            return "date-parser";
        }

        @Override
        public Category getCategory() {
            return Category.DATE;
        }

        @Override
        public Set<ProcessorTag> getTags() {
            return Sets.newHashSet((Object[])new ProcessorTag[]{ProcessorTag.DATE});
        }

        @Override
        public Class<Parameter> stepParamClass() {
            return Parameter.class;
        }

        @Override
        public String getHelp(String language) {
            return this.translate(language, "SHAKER.PROCESSOR.DateParser.HELP", "\nParse strings containing dates in any format into the standard ISO 8601 format (*yyyy-MM-ddTHH:mm:ss.SSSZ*) to work with them in DSS. Use Smart Dates to get semi-automatic date parsing with the assistance of DSS. \n\n# Options\n\n**Column**\n\nApply date parsing to the following: \n\n* A single column\n\n* An explicit list of columns\n\n* All columns matching a regex pattern\n\n* All columns\n\n**Output column**\n\nLeave blank to parse data in-place or create a separate output column.\n\n**Input date format(s)**\n\nOpen **Find with Smart Date** to get semi-automatic date parsing with the help of DSS. Otherwise, specify the format of your inputs column(s) using the <a target=\"_blank\" href=\"http://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html\">Java syntax for date specifiers</a>. \n\n<u>*Note*</u>\nCommon patterns include y (year), M (month in year), w (week in year), d (day in month), E (day name in week), a (am/pm marker), H (hour in day 0-24), h (hour in am/pm 1-12), m (minute in hour), s (second in minute), S (millisecond), Z (time zone).\n\n**Locale**\n\nTranslate date information in locale format (like \u2018mercredi\u2019 or \u2018janvier\u2019 in French). \n\n**Timezone**\n\nProvide details on the time zone, if needed. Options include using a TZ column, an IP column, or specifying a timezone from the dropdown. UTC is the default. \n\n# Related resources\n\nFor more information on managing dates with Dataiku DSS, please see the <a target=\"_blank\" href=\"https://doc.dataiku.com/dss/latest/preparation/dates.html\">reference documentation</a>. If you prefer a hands-on approach, check out this <a target=\"_blank\" href=\"https://knowledge.dataiku.com/latest/courses/basics/prepare-data/concept-date-handling.html\">brief tutorial</a> on parsing dates with DSS.\n\n");
        }

        @Override
        public ProcessorDesc describe(String language) {
            ProcessorDesc desc = new ProcessorDesc(this.getName(), this.translate(language, "SHAKER.PROCESSOR.DateParser.DESCRIPTION", 1.actionVerb("Parse") + " to standard date format"), false).withParam("outCol", "string", false, true, this.translate(language, "SHAKER.PROCESSORS.DESCRIPTION.OUTPUT_COLUMN_EMPTY_FOR_INPLACE", "Output column (empty for in-place)")).withParam(new ParamDesc("formats", "list").withMandatory(true).withLabel(this.translate(language, "SHAKER.PROCESSOR.DateParser.DESCRIPTION.DATE_FORMATS", "Date format(s)")).withCanBeEmpty(false)).withParam(ParamDesc.advancedSelect("lang", this.translate(language, "SHAKER.PROCESSOR.DateParser.DESCRIPTION.LANG", "Locale"), "", Language.class).withDefaultValue(Language.en_US.getLabel()));
            desc = TimezonableProcessor.appendTimezoneParams(language, desc);
            desc.withParam("outType", "string", false, false, this.translate(language, "SHAKER.PROCESSORS.DESCRIPTION.OUTPUT_COLUMN_TYPE", "Output Type"));
            return desc;
        }

        @Override
        public ProcessorMeta.ProcessorCapabilitiesSummary getCapabilities(StepParams params, ProcessorWithRecordedReport.ProcessorRecordedReport report, SQLDialect dialect) {
            Parameter p = (Parameter)params;
            ProcessorMeta.ProcessorCapabilitiesSummary ret = new ProcessorMeta.ProcessorCapabilitiesSummary();
            if ("extract_from_ip".equals(p.timezone_id)) {
                ret.withCould(ProcessorCapabilities.NATIVE_SPARK_IMPL, "Only fixed or column timezone is supported");
            } else {
                ret.withCan(ProcessorCapabilities.NATIVE_SPARK_IMPL);
            }
            if ("extract_from_ip".equals(p.timezone_id) || "extract_from_column".equals(p.timezone_id)) {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Only fixed timezone is supported");
            } else if (p.lang == Language.auto) {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "'Translate automatically' locale is not supported");
            } else if (dialect.getOperator(QueryUtils.OperatorType.TRY_PARSE) == null) {
                ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Parsing string to date is not available");
            } else {
                ArrayList undoableFormats = Lists.newArrayList();
                for (String format : p.getValidFormats()) {
                    SQLCapability capability = dialect.canFormatDate(format, true);
                    if (capability.capable) continue;
                    undoableFormats.add(format + " (" + capability.reason + ")");
                }
                if (!undoableFormats.isEmpty()) {
                    ret.withCould(ProcessorCapabilities.SQL_TRANSLATABLE, "Cannot handle formats : " + Joiner.on((String)", ").join((Iterable)undoableFormats));
                } else {
                    ret.withCan(ProcessorCapabilities.SQL_TRANSLATABLE);
                }
            }
            return ret;
        }

        @Override
        public Object selfReport(Parameter parameter) {
            JsonObject out = AppliesToProcessor.selfReport(parameter);
            out.remove("outCol");
            return out;
        }

        @Override
        public DateParser build(Parameter parameter) {
            return new DateParser(parameter);
        }

        @Override
        public String getNativeSparkClassname() {
            return "com.dataiku.dip.shaker.processors.time.DateParserNS";
        }

        @Override
        public ProcessorSQLTranslator getSQLTranslator(StepParams parameter, ProcessorWithRecordedReport.ProcessorRecordedReport report) {
            return new SQLTranslator((Parameter)parameter);
        }

        @Override
        public RecipeLineage getUpdatedRecipeLineage(ProcessorScriptStep pss, RecipeLineage previousRecipeLineage) {
            if (!(pss.params instanceof Parameter)) {
                throw new IllegalArgumentException("Unsupported param type: " + pss.params.getClass().getSimpleName());
            }
            Parameter dateParserParam = (Parameter)pss.params;
            RecipeLineage updatedRecipeLineage = new RecipeLineage();
            previousRecipeLineage.getDatasetPairLineages().forEach((datasetPair, previousDatasetPairLineage) -> {
                DatasetPairLineage updatedDatasetPairLineage = super.getUpdatedDatasetPairLineage(dateParserParam, (DatasetPairLineage)previousDatasetPairLineage, dateParserParam.outCol, AppliesToProcessor.AppliesToProcessorMeta.RelationDirection.TO, false);
                updatedRecipeLineage.setDatasetPairLineage((Pair<String, String>)datasetPair, updatedDatasetPairLineage);
            });
            return updatedRecipeLineage;
        }
    };
    private Parameter params;
    private List<FormatAndPattern> dateFormats;
    private Column outCD;
    DateTimeFormatter isoFormatter = ISODateTimeFormat.dateTime().withZone(DateTimeZone.UTC);
    private TimezonableProcessor timezoneHelper = new TimezonableProcessor(){

        public void init() throws Exception {
        }

        public void processRow(Row row) throws Exception {
        }

        public void postProcess() throws Exception {
        }

        public void setColumnFactory(ColumnFactory cf) {
            this.cf = cf;
        }
    };
    private static DKULogger logger = DKULogger.getLogger((String)"dku.shaker.date").addFilter((DKULoggerFilter)LimitFilter.withLimit((int)5));

    public DateParser(Parameter params) {
        this.params = params;
    }

    @Override
    public void init() throws Exception {
        super.init();
        if (StringUtils.isNotBlank((String)this.params.outCol) && this.params.appliesTo == AppliesToProcessor.AppliesTo.SINGLE_COLUMN) {
            this.outCD = this.getColumnFactory().columnAfter((String)this.params.columns.get(0), this.params.outCol, Processor.ProcessorRole.OUTPUT_COLUMN);
        }
        this.dateFormats = new ArrayList<FormatAndPattern>();
        for (String fmt : this.params.getValidFormats()) {
            this.dateFormats.add(new FormatAndPattern(fmt, DateParser.getFormatJoda(fmt, this.params.lang)));
        }
        this.timezoneHelper.setColumnFactory(this.cf);
        this.timezoneHelper.initTimezonableWithParams(this.params.timezone_id, this.params.timezone_src);
    }

    private static DateTimeFormatterFix getFormatJoda(String format, Language lang) {
        return DateTimeFormatterFix.forPattern(format).withLocale(lang.toLocale());
    }

    public static SimpleDateFormat getFormat(String format, Language lang) throws IllegalArgumentException {
        if (StringUtils.isBlank((String)format)) {
            throw new IllegalArgumentException("Date format cannot be empty");
        }
        SimpleDateFormat sdf = new SimpleDateFormat(format, lang.toLocale());
        Calendar calendar = Calendar.getInstance();
        calendar.setFirstDayOfWeek(2);
        calendar.setMinimalDaysInFirstWeek(4);
        sdf.setCalendar(calendar);
        return sdf;
    }

    @Override
    public void setRequiredFiles(Map<String, File> requiredFiles) {
        this.timezoneHelper.setRequiredFiles(requiredFiles);
    }

    @Override
    public Map<String, File> gatherRequirements() {
        return this.timezoneHelper.gatherRequirements();
    }

    @Override
    public void processRowForColumns(Row row, Iterable<Column> columns) throws Exception {
        for (Column cd : columns) {
            String cellValue = row.get(cd);
            if (cellValue == null || cellValue.length() == 0) continue;
            ArrayList<String> errors = new ArrayList<String>();
            String parsed = null;
            for (int i = 0; i < this.dateFormats.size() && parsed == null; ++i) {
                FormatAndPattern formatAndPattern = this.dateFormats.get(i);
                String format = formatAndPattern.pattern;
                String processedValue = cellValue;
                processedValue = this.params.lang.equals(Language.auto) && (format.contains("MMM") || format.contains("EEE")) ? DateFormatGuesser.translateMonthAndDay(processedValue) : processedValue.toLowerCase(this.params.lang.toLocale());
                try {
                    MutableDateTime d;
                    int ret;
                    DateTimeZone tz = this.timezoneHelper.getTimezone(row);
                    if (tz == null) continue;
                    DateTimeFormatterFix dateFormat = formatAndPattern.format;
                    if (dateFormat.getZone() != tz) {
                        formatAndPattern.format = dateFormat = dateFormat.withZone(tz);
                    }
                    if ((ret = dateFormat.parseInto((ReadWritableInstant)(d = new MutableDateTime(0L, DateTimeZone.UTC)), processedValue, 0)) > 0) {
                        if (this.params.outType.getType() == Type.DATEONLY) {
                            parsed = this.isoFormatter.print((ReadableInstant)d).substring(0, 10);
                            continue;
                        }
                        if (this.params.outType.getType() == Type.DATETIMENOTZ) {
                            parsed = DatetimeNoTz.CANONICAL_FORMATTER.print((ReadablePartial)d.toDateTime().toLocalDateTime());
                            continue;
                        }
                        parsed = this.isoFormatter.print((ReadableInstant)d);
                        continue;
                    }
                    if (ret == 0) {
                        errors.add(String.format("Failed to parse '%s' using format '%s'", processedValue, format));
                        continue;
                    }
                    int pos = ~ret;
                    errors.add(String.format("Failed to parse '%s' using format '%s', at position %d", processedValue, format, pos));
                    continue;
                }
                catch (Exception e) {
                    String error = String.format("Failed to parse '%s' using format '%s': %s", processedValue, format, e.getMessage());
                    errors.add(error);
                }
            }
            if (parsed != null) {
                if (this.outCD != null) {
                    row.put(this.outCD, parsed);
                    continue;
                }
                row.put(cd, parsed);
                continue;
            }
            if (this.outCD != null) {
                row.delete(this.outCD);
            } else {
                row.put(cd, null);
            }
            for (String err : errors) {
                this.warningsContext.addWarning(WarningsContext.WarningType.SHAKER_BAD_DATE, err, logger);
            }
        }
    }

    public void postProcess() throws Exception {
    }

    @Override
    public Parameter getParams() {
        return this.params;
    }

    public static class Parameter
    extends AppliesToProcessor.AppliesToParams {
        private static final long serialVersionUID = -1L;
        public String outCol;
        public Language lang = Language.en_US;
        public String timezone_id = "UTC";
        public String timezone_src;
        public SchemaColumn outType = new SchemaColumn("out", Type.DATE);
        public List<String> formats;

        public List<String> getValidFormats() {
            return this.formats == null ? new ArrayList<String>() : this.formats.stream().filter(StringUtils::isNotBlank).collect(Collectors.toList());
        }
    }

    private static class FormatAndPattern {
        private String pattern;
        private DateTimeFormatterFix format;

        FormatAndPattern(String pattern, DateTimeFormatterFix format) {
            this.pattern = pattern;
            this.format = format;
        }
    }

    private static class SQLTranslator
    implements ProcessorSQLTranslator {
        private final Parameter parameter;
        private final ExpressionBuilder.ExpressionBuilderFactory ebf = new ExpressionBuilder.ExpressionBuilderFactory();

        private SQLTranslator(Parameter parameter) {
            this.parameter = parameter;
        }

        @Override
        public SQLQueryWithSchema translate(ProcessorSQLTranslator.ProcessorSQLTranslateParams translateParams) {
            SQLQueryWithSchema input = translateParams.input;
            if (this.parameter.formats.isEmpty()) {
                return input;
            }
            if (this.parameter.appliesTo != AppliesToProcessor.AppliesTo.SINGLE_COLUMN || StringUtils.equals((String)((String)this.parameter.columns.get(0)), (String)this.parameter.outCol)) {
                this.parameter.outCol = null;
            }
            List<String> affectedColumns = input.getAppliesToColumns(this.parameter);
            boolean needsSubquery = input.isAnyCreatedOrModifiedByCurrentQuery(affectedColumns);
            if (StringUtils.isNotBlank((String)this.parameter.outCol)) {
                needsSubquery |= input.isCreatedOrModifiedByCurrentQuery(this.parameter.outCol);
            }
            if (needsSubquery) {
                input = input.makeSubquery();
            }
            Locale locale = this.parameter.lang.toLocale();
            for (String format : this.parameter.getValidFormats()) {
                if (input.getDialect().canFormatDate((String)format, (boolean)true).capable) continue;
                throw new IllegalArgumentException("Cannot handle format '" + format + "'");
            }
            if (input.getDialect().getOperator(QueryUtils.OperatorType.TRY_PARSE) == null) {
                throw new IllegalArgumentException("Cannot handle parse dates in " + input.getDialect().getId());
            }
            for (String column : affectedColumns) {
                SchemaColumn inputSchemaColumn = input.getCurrentColumn(column);
                ExpressionBuilder col = input.col(inputSchemaColumn, translateParams.typeSystemVersion, translateParams.isDatasetManaged);
                ArrayList ebs = Lists.newArrayList();
                for (String format : this.parameter.getValidFormats()) {
                    ebs.add(col.tryParse(this.ebf.cst(this.parameter.outType.getType()), this.ebf.cst(format), this.ebf.cst(locale), this.ebf.cst(this.parameter.timezone_id)));
                }
                ExpressionBuilder e = ebs.size() == 1 ? (ExpressionBuilder)ebs.get(0) : this.ebf.coalesce(ebs.toArray(new ExpressionBuilder[0]));
                input.addAfterOrReplaceColumn(inputSchemaColumn, e, this.parameter.outType.getType(), this.parameter.outCol, false);
            }
            return input;
        }
    }
}

