/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats;

import com.dataiku.dip.coremodel.FormatParams;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.DatasetCodes;
import com.dataiku.dip.datasets.SchemaDetection;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.exceptions.ConfValidators;
import com.dataiku.dip.formats.FormatFactory;
import com.dataiku.dip.formats.FormatMeta;
import com.dataiku.dip.input.formats.ArchiveCapableFormatExtractor;
import com.dataiku.dip.input.formats.RowFactoryWithContextInfo;
import com.dataiku.dip.output.OutputFormatter;
import com.dataiku.dip.plugin.InputStreamWithContextInfo;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.NotImplementedException;
import com.dataiku.dip.utils.Params;
import com.dataiku.dip.utils.StringUtils;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.org.apache.commons.io.input.BOMInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RawRegexpFormatExtractor
extends ArchiveCapableFormatExtractor {
    public static final FormatMeta<RawRegexpFormatExtractor, Config> META = new FormatMeta<RawRegexpFormatExtractor, Config>(){

        @Override
        public String getType() {
            return "regexp";
        }

        @Override
        public Class<? extends FormatParams> paramsClass() {
            return Config.class;
        }

        @Override
        public RawRegexpFormatExtractor build(AuthCtx authCtx, String projectKey, FormatParams params) throws CodedException {
            return new RawRegexpFormatExtractor((Config)params);
        }

        @Override
        public OutputFormatter buildFormatter(AuthCtx authCtx, String projectKey, FormatParams params) {
            throw new NotImplementedException();
        }

        @Override
        public String getLabel() {
            return "Regular expression";
        }

        @Override
        public SchemaDetection.SchemaHandlingType getSchemaHandlingType() {
            return SchemaDetection.SchemaHandlingType.TEXT_POSITION_BASED_FIXED_COLUMNS;
        }

        @Override
        public ParamDesc[] getParams() {
            return new ParamDesc[]{ParamDesc.string("regex", "Pattern"), new ParamDesc("captureNames", "stringarray").withLabel("Capture names").withTooltip("Comma-separated list of captured column names"), new ParamDesc("charset", "charset").withMandatory(false).withLabel("Charset"), FormatFactory.getStandardCompressionMethods()};
        }
    };
    private final Config config;
    private final Pattern pattern;
    private final List<String> captureNames;
    private final DKULogger logger = DKULogger.getLogger((String)"dku.input.regexp");

    public RawRegexpFormatExtractor(Config config) throws CodedException {
        ConfValidators.checkNotBlank(config.captureNames, (InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_FORMAT_CONFIG, "Capture names");
        ConfValidators.checkNotBlank(config.captureNames, (InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_FORMAT_CONFIG, "Pattern");
        ConfValidators.checkNotBlank(config.charset, (InfoMessage.MessageCode)DatasetCodes.ERR_DATASET_INVALID_FORMAT_CONFIG, "Charset");
        this.config = config;
        this.captureNames = Params.getFromCSVList((String)config.captureNames, (String)"", (String)",");
        this.pattern = Pattern.compile(config.regex);
    }

    public boolean find(String line) {
        Matcher m = this.pattern.matcher(line);
        return m.find();
    }

    @Override
    protected boolean doExtractStream(InputStreamWithContextInfo isn, ProcessorOutput out, ColumnFactory cf, RowFactory rowFactory, ArchiveCapableFormatExtractor.ArchiveCapableObserver observer) throws Exception {
        InputStream is = isn.getInputStream();
        RowFactoryWithContextInfo rf = new RowFactoryWithContextInfo(rowFactory, isn);
        ArrayList<Column> columns = new ArrayList<Column>();
        for (String columnName : this.captureNames) {
            columns.add(cf.column(columnName));
        }
        if (StringUtils.isUtf8((String)this.config.charset)) {
            is = new BOMInputStream(is);
        }
        try (BufferedReader br = new BufferedReader(new InputStreamReader(is, this.config.charset));){
            long nlines = 0L;
            Matcher m = this.pattern.matcher("");
            while (true) {
                if (!observer.checkLimit(nlines)) {
                    boolean bl = false;
                    return bl;
                }
                String line = br.readLine();
                if (line == null) break;
                line = line.trim();
                m.reset(line);
                if (m.find()) {
                    Row r = rf.row();
                    for (int i = 0; i < m.groupCount(); ++i) {
                        String group = m.group(i + 1);
                        r.put((Column)columns.get(i), group);
                    }
                    out.emitRow(r);
                } else {
                    this.warnContext.addWarning(WarningsContext.WarningType.INPUT_DATA_LINE_DOES_NOT_PARSE, "Line did not parse: " + line, this.logger);
                }
                if (++nlines % 500L != 0L) continue;
                observer.onInterval(nlines);
            }
            observer.onEnd(nlines);
        }
        return true;
    }

    public static class Config
    implements FormatParams {
        public String charset;
        public String regex;
        public String captureNames;

        public Config() {
        }

        public Config(String regex, String captureNames) {
            this.regex = regex;
            this.captureNames = captureNames;
        }
    }
}

