/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats.csv;

import com.dataiku.dip.DKUApp;
import com.dataiku.dip.input.formats.InputFormatsDetector;
import com.dataiku.dip.input.formats.LineOrientedFormatDetector;
import com.dataiku.dip.input.formats.LineOrientedInputSample;
import com.dataiku.dip.input.formats.csv.CSVFormatConfig;
import com.dataiku.dip.input.formats.vendor.opencsv.CSVReader;
import com.dataiku.dip.utils.NumArrays;
import com.google.common.collect.Lists;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

public class CSVInputFormatDetector
extends LineOrientedFormatDetector {
    private static Logger logger = Logger.getLogger((String)"dku.formats.detect");

    @Override
    public Map<String, String> recomputeMetadata(String autoDetectedFormatId, LineOrientedInputSample sample, String filename) {
        return autoDetectedFormatId.equals("csv") ? new HashMap() : null;
    }

    @Override
    public List<InputFormatsDetector.FormatWithMetadata> detect(LineOrientedInputSample sample, String filename) throws Exception {
        if (sample.hadHugeLines) {
            return new ArrayList<InputFormatsDetector.FormatWithMetadata>();
        }
        CSVFormatConfig csvConf = CSVInputFormatDetector.detectConf(sample);
        if (csvConf.probableNumberOfRecords >= 1) {
            InputFormatsDetector.FormatWithMetadata fwm = new InputFormatsDetector.FormatWithMetadata();
            fwm.type = "csv";
            fwm.params = csvConf;
            fwm.detectionScore = csvConf.getSeparatorChar() == ' ' ? 10.0 : 600.0;
            return Lists.newArrayList((Object[])new InputFormatsDetector.FormatWithMetadata[]{fwm});
        }
        return new ArrayList<InputFormatsDetector.FormatWithMetadata>();
    }

    public static CSVFormatConfig detectConf(LineOrientedInputSample sample) throws Exception {
        CSVFormatConfig detectedConf = new CSVFormatConfig();
        detectedConf.setMaxRowChars(DKUApp.getParams().getIntParam("dku.input.formats.csv.maxRowChars", Integer.valueOf(100000000)));
        detectedConf.charset = sample.usedCharset;
        String sampleContent = sample.allLines.toString();
        detectedConf.style = sampleContent.contains("\\\n") || sampleContent.contains("\\\t") ? CSVFormatConfig.CSVStyle.ESCAPE_ONLY_NO_QUOTE : (sampleContent.contains("\"\"") ? CSVFormatConfig.CSVStyle.EXCEL : CSVFormatConfig.CSVStyle.EXCEL);
        detectedConf.setQuoteStr("\"");
        detectedConf.setEscapeStr("\\");
        char[] testedSeparators = new char[]{'\u0001', '?', ';', ' ', ':', ',', '|', '\t', '#'};
        Object[] matches = new CSVMatch[testedSeparators.length];
        for (int ts = 0; ts < testedSeparators.length; ++ts) {
            char testedSeparator = testedSeparators[ts];
            try (CSVReader reader = new CSVReader((Reader)new StringReader(sample.allLines.toString()), testedSeparator);){
                String[] chunks;
                int[] lineLengths = new int[sample.lines.size()];
                for (int i = 0; i < sample.lines.size() && (chunks = reader.readNext()) != null; ++i) {
                    lineLengths[i] = chunks.length;
                }
                int[][] analysis = NumArrays.distinctValuesCounts((int[])lineLengths);
                CSVMatch m = new CSVMatch();
                m.sep = testedSeparator;
                m.idx = ts;
                if (analysis.length > 0 && analysis[analysis.length - 1][0] > 1) {
                    m.bestNumberOfColumns = analysis[analysis.length - 1][0];
                    m.biggestNumberOfIdenticalLines = analysis[analysis.length - 1][1];
                }
                matches[ts] = m;
                continue;
            }
        }
        Arrays.sort(matches);
        logger.info((Object)("Best separator is " + ((CSVMatch)matches[matches.length - 1]).idx));
        detectedConf.setSeparatorStr("" + ((CSVMatch)matches[matches.length - 1]).sep);
        detectedConf.probableNumberOfRecords = ((CSVMatch)matches[matches.length - 1]).bestNumberOfColumns;
        try (CSVReader reader = new CSVReader((Reader)new StringReader(sample.allLines.toString()), detectedConf.getSeparatorChar());){
            for (int i = 0; i < 20; ++i) {
                String[] chunks = reader.readNext();
                if (chunks == null) {
                } else {
                    boolean isAllEmptyCells = Arrays.stream(chunks).noneMatch(StringUtils::isNotBlank);
                    if (chunks.length != detectedConf.probableNumberOfRecords || isAllEmptyCells) {
                        ++detectedConf.skipRowsBeforeHeader;
                        continue;
                    }
                    if (chunks[0].startsWith("#")) {
                        detectedConf.parseHeaderRow = true;
                    } else {
                        detectedConf.parseHeaderRow = true;
                        for (String chunk : chunks) {
                            if (!StringUtils.isNotBlank((String)chunk) || !StringUtils.isNumeric((String)chunk)) continue;
                            detectedConf.parseHeaderRow = false;
                        }
                    }
                }
                break;
            }
        }
        return detectedConf;
    }

    private static class CSVMatch
    implements Comparable<CSVMatch> {
        char sep;
        int idx;
        int biggestNumberOfIdenticalLines;
        int bestNumberOfColumns;

        private CSVMatch() {
        }

        @Override
        public int compareTo(CSVMatch o) {
            if (this.biggestNumberOfIdenticalLines == o.biggestNumberOfIdenticalLines) {
                return this.bestNumberOfColumns - o.bestNumberOfColumns;
            }
            return this.biggestNumberOfIdenticalLines - o.biggestNumberOfIdenticalLines;
        }
    }
}

