/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats;

import com.dataiku.dip.coremodel.FormatParams;
import com.dataiku.dip.datasets.SchemaDetection;
import com.dataiku.dip.exceptions.CodedException;
import com.dataiku.dip.formats.FormatFactory;
import com.dataiku.dip.formats.FormatMeta;
import com.dataiku.dip.input.formats.InputFormatsDetector;
import com.dataiku.dip.input.formats.LineOrientedInputSample;
import com.dataiku.dip.input.formats.RawRegexpFormatExtractor;
import com.dataiku.dip.input.formats.RegexpFieldsBuilderFactory;
import com.dataiku.dip.input.formats.SmartRegexpFormatExtractor;
import com.dataiku.dip.output.OutputFormatter;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.util.ParamDesc;
import com.dataiku.dip.utils.NotImplementedException;
import com.dataiku.dip.utils.RegexpFieldsMatcher;
import java.util.ArrayList;
import java.util.List;

public class RegexpBasedFormats {
    private static SmartRegexpFormatExtractor.Config apacheCombinedConfig = new SmartRegexpFormatExtractor.Config();
    public static final FormatMeta<SmartRegexpFormatExtractor, SmartRegexpFormatExtractor.Config> APACHE_COMBINED_META;
    private static SmartRegexpFormatExtractor.Config apacheInSyslogConfig;
    public static final FormatMeta<SmartRegexpFormatExtractor, SmartRegexpFormatExtractor.Config> APACHE_IN_SYSLOG_COMBINED_META;
    private static RawRegexpFormatExtractor.Config syslogConfig;
    public static final FormatMeta<RawRegexpFormatExtractor, SmartRegexpFormatExtractor.Config> SYSLOG_META;

    public RegexpBasedFormats() {
        syslogConfig = new RawRegexpFormatExtractor.Config("([A-z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}) ([^ ]*) ([^ ]*): (.*)$", "date,host,process,message");
    }

    private boolean isOK(SmartRegexpFormatExtractor.Config config, LineOrientedInputSample sample) {
        RegexpFieldsMatcher matcher = RegexpFieldsBuilderFactory.build((boolean)config.autoAddSpace, config.steps);
        int ok = 0;
        for (String sampleLine : sample.lines) {
            if (matcher.exec(sampleLine = sampleLine.trim()) == null) continue;
            ++ok;
        }
        return ok > 0 && ok >= sample.lines.size() / 2;
    }

    private boolean isOK(RawRegexpFormatExtractor.Config config, LineOrientedInputSample sample) {
        try {
            RawRegexpFormatExtractor rrfe = new RawRegexpFormatExtractor(config);
            int ok = 0;
            for (String sampleLine : sample.lines) {
                if (!rrfe.find(sampleLine = sampleLine.trim())) continue;
                ++ok;
            }
            return ok > 0 && ok >= sample.lines.size() / 2;
        }
        catch (CodedException e) {
            return false;
        }
    }

    public List<InputFormatsDetector.FormatWithMetadata> detect(LineOrientedInputSample sample) {
        ArrayList<InputFormatsDetector.FormatWithMetadata> candidates = new ArrayList<InputFormatsDetector.FormatWithMetadata>();
        if (this.isOK(apacheCombinedConfig, sample)) {
            candidates.add(new InputFormatsDetector.FormatWithMetadata(APACHE_COMBINED_META.getType(), 800));
        }
        if (this.isOK(apacheInSyslogConfig, sample)) {
            candidates.add(new InputFormatsDetector.FormatWithMetadata(APACHE_IN_SYSLOG_COMBINED_META.getType(), 800));
        }
        if (this.isOK(syslogConfig, sample)) {
            candidates.add(new InputFormatsDetector.FormatWithMetadata(SYSLOG_META.getType(), 600));
        }
        return candidates;
    }

    static {
        RegexpBasedFormats.apacheCombinedConfig.autoAddSpace = true;
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"ip", (String)"ip"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.matchStep((String)"-"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"user", (String)"[-_A-z0-9]*"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.customCaptureStep((String)"apache_time", (String)"\\[([^\\]]*)\\]"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"request", (String)"quotted_no_escape"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"http_code", (String)"integer"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"http_len", (String)"[0-9]*|-"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"referer", (String)"quotted_no_escape"));
        RegexpBasedFormats.apacheCombinedConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"user_Agent", (String)"quotted_no_escape"));
        APACHE_COMBINED_META = new FormatMeta<SmartRegexpFormatExtractor, SmartRegexpFormatExtractor.Config>(){

            @Override
            public String getType() {
                return "apache_combined";
            }

            @Override
            public Class<? extends FormatParams> paramsClass() {
                return NullConfig.class;
            }

            @Override
            public SmartRegexpFormatExtractor build(AuthCtx authCtx, String projectKey, FormatParams params) {
                return new SmartRegexpFormatExtractor(RegexpFieldsBuilderFactory.build((boolean)RegexpBasedFormats.apacheCombinedConfig.autoAddSpace, RegexpBasedFormats.apacheCombinedConfig.steps));
            }

            @Override
            public OutputFormatter buildFormatter(AuthCtx authCtx, String projectKey, FormatParams params) {
                throw new NotImplementedException();
            }

            @Override
            public String getLabel() {
                return "Apache combined log";
            }

            @Override
            public SchemaDetection.SchemaHandlingType getSchemaHandlingType() {
                return SchemaDetection.SchemaHandlingType.TEXT_POSITION_BASED_FIXED_COLUMNS;
            }

            @Override
            public ParamDesc[] getParams() {
                return new ParamDesc[]{FormatFactory.getStandardCompressionMethods()};
            }
        };
        apacheInSyslogConfig = new SmartRegexpFormatExtractor.Config();
        RegexpBasedFormats.apacheInSyslogConfig.autoAddSpace = true;
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.matchStep((String)"[a-zA-Z]{3}"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.matchStep((String)"[0-9]+"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.matchStep((String)"[0-9]+:[0-9]+:[0-9]+"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"servername", (String)"[-a-zA-z0-9]+"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"instancename", (String)"[-a-zA-z0-9]+"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"ip", (String)"ip"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.matchStep((String)"-"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"user", (String)"[-_A-z0-9]*"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.customCaptureStep((String)"apache_time", (String)"\\[([^\\]]*)\\]"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"request", (String)"quotted_no_escape"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"http_code", (String)"integer"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.captureStep((String)"http_len", (String)"[0-9]*|-"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"referer", (String)"quotted_no_escape"));
        RegexpBasedFormats.apacheInSyslogConfig.steps.add(RegexpFieldsBuilderFactory.Step.predefCaptureStep((String)"user_Agent", (String)"quotted_no_escape"));
        APACHE_IN_SYSLOG_COMBINED_META = new FormatMeta<SmartRegexpFormatExtractor, SmartRegexpFormatExtractor.Config>(){

            @Override
            public String getType() {
                return "apache_combined_in_syslog";
            }

            @Override
            public Class<? extends FormatParams> paramsClass() {
                return NullConfig.class;
            }

            @Override
            public SmartRegexpFormatExtractor build(AuthCtx authCtx, String projectKey, FormatParams params) {
                return new SmartRegexpFormatExtractor(RegexpFieldsBuilderFactory.build((boolean)RegexpBasedFormats.apacheInSyslogConfig.autoAddSpace, RegexpBasedFormats.apacheInSyslogConfig.steps));
            }

            @Override
            public OutputFormatter buildFormatter(AuthCtx authCtx, String projectKey, FormatParams params) {
                throw new NotImplementedException();
            }

            @Override
            public String getLabel() {
                return "Apache combined log within syslog";
            }

            @Override
            public SchemaDetection.SchemaHandlingType getSchemaHandlingType() {
                return SchemaDetection.SchemaHandlingType.TEXT_POSITION_BASED_FIXED_COLUMNS;
            }

            @Override
            public ParamDesc[] getParams() {
                return new ParamDesc[]{FormatFactory.getStandardCompressionMethods()};
            }
        };
        syslogConfig = new RawRegexpFormatExtractor.Config("([A-z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}) ([^ ]*) ([^ ]*): (.*)$", "date,host,process,message");
        SYSLOG_META = new FormatMeta<RawRegexpFormatExtractor, SmartRegexpFormatExtractor.Config>(){

            @Override
            public String getType() {
                return "syslog";
            }

            @Override
            public Class<? extends FormatParams> paramsClass() {
                return NullConfig.class;
            }

            @Override
            public RawRegexpFormatExtractor build(AuthCtx authCtx, String projectKey, FormatParams params) throws CodedException {
                return new RawRegexpFormatExtractor(syslogConfig);
            }

            @Override
            public OutputFormatter buildFormatter(AuthCtx authCtx, String projectKey, FormatParams params) {
                throw new NotImplementedException();
            }

            @Override
            public String getLabel() {
                return "Syslog";
            }

            @Override
            public SchemaDetection.SchemaHandlingType getSchemaHandlingType() {
                return SchemaDetection.SchemaHandlingType.TEXT_POSITION_BASED_FIXED_COLUMNS;
            }

            @Override
            public ParamDesc[] getParams() {
                return new ParamDesc[]{FormatFactory.getStandardCompressionMethods()};
            }
        };
    }

    public static class NullConfig
    implements FormatParams {
    }
}

