/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.shaker.mrimpl.formats;

import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.StorageTypeVerifier;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.input.formats.csv.CSVDeserializer;
import com.dataiku.dip.input.formats.csv.CSVFormatConfig;
import com.dataiku.dip.input.formats.csv.CSVParser;
import com.dataiku.dip.input.formats.csv.EscapingOnlyCSVParser;
import com.dataiku.dip.input.formats.csv.NoEscapeNoQuoteCSVParser;
import com.dataiku.dip.input.formats.csv.RFC4180CSVParser;
import com.dataiku.dip.input.formats.csv.UNIXStyleCSVParser;
import com.dataiku.dip.input.stream.LineReader;
import com.dataiku.dip.shaker.mrimpl.formats.RowWithFactories;
import com.dataiku.dip.shaker.mrimpl.models.DatasetConfig;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class CSVInputFormatAdapter
extends FileInputFormat<Void, RowWithFactories> {
    private static final DKULogger logger = DKULogger.getLogger(CSVInputFormatAdapter.class);
    private final TextInputFormat inputFormat;
    private final CSVFormatConfig csvFormatParams;
    private final ColumnFactory columnFactory;
    private final RowFactory rowFactory;
    private final List<Column> columns;
    private final WarningsContext warningsContext;
    private final List<SchemaColumn> schemaColumns;
    private final CSVDeserializer deserializer;
    private StorageTypeVerifier storageTypeVerifier = new StorageTypeVerifier();

    public CSVInputFormatAdapter(ColumnFactory cf, RowFactory rf, DatasetConfig inputConf, WarningsContext warningsContext) {
        this.rowFactory = rf;
        this.columnFactory = cf;
        this.inputFormat = new TextInputFormat();
        this.warningsContext = (WarningsContext)Preconditions.checkNotNull((Object)warningsContext);
        this.csvFormatParams = (CSVFormatConfig)inputConf.dataset.getFormatParams();
        this.schemaColumns = inputConf.dataset.getSchema().getColumns();
        this.deserializer = new CSVDeserializer(this.csvFormatParams, warningsContext, null);
        this.columns = new ArrayList<Column>();
        for (SchemaColumn sc : this.schemaColumns) {
            this.columns.add(this.columnFactory.column(sc.getName()));
        }
    }

    public List<InputSplit> getSplits(JobContext job) throws IOException {
        return this.inputFormat.getSplits(job);
    }

    public RecordReader<Void, RowWithFactories> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
        RecordReader realRecordReader = this.inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        return new InternalRecordReader((RecordReader<LongWritable, Text>)realRecordReader);
    }

    private class InternalRecordReader
    extends RecordReader<Void, RowWithFactories>
    implements LineReader {
        private final RecordReader<LongWritable, Text> realRecordReader;
        private final List<String> cellBuffer = new ArrayList<String>();
        private final CSVParser parser;
        private String currentLine;
        private boolean endReached;
        private RowWithFactories currentRow;

        public InternalRecordReader(RecordReader<LongWritable, Text> realRecordReader) {
            this.realRecordReader = realRecordReader;
            switch (CSVInputFormatAdapter.this.csvFormatParams.style) {
                case ESCAPE_ONLY_NO_QUOTE: {
                    this.parser = new EscapingOnlyCSVParser(this, CSVInputFormatAdapter.this.csvFormatParams.getSeparatorChar(), CSVInputFormatAdapter.this.csvFormatParams.getEscapeChar());
                    break;
                }
                case EXCEL: {
                    this.parser = new RFC4180CSVParser((LineReader)this, CSVInputFormatAdapter.this.csvFormatParams.getSeparatorChar());
                    break;
                }
                case NO_ESCAPE_NO_QUOTE: {
                    this.parser = new NoEscapeNoQuoteCSVParser(this, CSVInputFormatAdapter.this.csvFormatParams.getSeparatorChar());
                    break;
                }
                case UNIX: {
                    this.parser = null;
                    break;
                }
                default: {
                    throw new RuntimeException("Unsupported CSV style: " + String.valueOf((Object)CSVInputFormatAdapter.this.csvFormatParams.style));
                }
            }
        }

        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            this.realRecordReader.initialize(inputSplit, taskAttemptContext);
        }

        public boolean nextKeyValue() throws IOException, InterruptedException {
            this.currentLine = null;
            this.currentRow = null;
            if (!this.endReached) {
                this.cellBuffer.clear();
                if (this.realRecordReader.nextKeyValue()) {
                    long currentOffset = ((LongWritable)this.realRecordReader.getCurrentKey()).get();
                    if (currentOffset == 0L && CSVInputFormatAdapter.this.csvFormatParams.parseHeaderRow && !this.realRecordReader.nextKeyValue()) {
                        this.endReached = true;
                        return false;
                    }
                    this.currentLine = ((Text)this.realRecordReader.getCurrentValue()).toString();
                    currentOffset = ((LongWritable)this.realRecordReader.getCurrentKey()).get();
                    CSVParser currentParser = this.parser;
                    if (CSVInputFormatAdapter.this.csvFormatParams.style == CSVFormatConfig.CSVStyle.UNIX) {
                        currentParser = new UNIXStyleCSVParser(new StringReader(this.currentLine), CSVInputFormatAdapter.this.csvFormatParams.getSeparatorChar(), CSVInputFormatAdapter.this.csvFormatParams.getQuoteChar(), Character.valueOf(CSVInputFormatAdapter.this.csvFormatParams.getEscapeChar()));
                    }
                    assert (currentParser != null) : "The parser cannot be null";
                    if (currentParser.next(this.cellBuffer)) {
                        this.sanityCheck(currentOffset, this.cellBuffer);
                        Row row = CSVInputFormatAdapter.this.rowFactory.row();
                        for (int i = 0; i < CSVInputFormatAdapter.this.columns.size(); ++i) {
                            SchemaColumn sc = CSVInputFormatAdapter.this.schemaColumns.get(i);
                            if (this.cellBuffer.size() <= i) break;
                            String value = this.cellBuffer.get(i);
                            if (!sc.getType().isPrimitive()) {
                                value = CSVInputFormatAdapter.this.deserializer.parseComplex(value, sc);
                            } else if (sc.getType() == Type.DATE) {
                                value = CSVInputFormatAdapter.this.deserializer.parseDate(value, sc);
                            } else if (sc.getType() == Type.BOOLEAN) {
                                value = CSVInputFormatAdapter.this.deserializer.parseBoolean(value, sc);
                            } else if (sc.getType() == Type.FLOAT || sc.getType() == Type.DOUBLE) {
                                value = CSVInputFormatAdapter.this.deserializer.parseDouble(value, sc);
                            }
                            value = CSVInputFormatAdapter.this.storageTypeVerifier.verify(value, sc, CSVInputFormatAdapter.this.csvFormatParams.readDataTypeMismatchBehavior, CSVInputFormatAdapter.this.warningsContext, null);
                            if (i >= this.cellBuffer.size()) continue;
                            row.put(CSVInputFormatAdapter.this.columns.get(i), value);
                        }
                        this.currentRow = new RowWithFactories(CSVInputFormatAdapter.this.columnFactory, CSVInputFormatAdapter.this.rowFactory, row);
                        return true;
                    }
                }
                this.endReached = true;
            }
            return false;
        }

        private void sanityCheck(long offset, List<String> line) {
            if (CSVInputFormatAdapter.this.columns.size() > 0 && line.size() != CSVInputFormatAdapter.this.columns.size()) {
                CSVInputFormatAdapter.this.warningsContext.addWarning(WarningsContext.WarningType.INPUT_DATA_BAD_NBCOLS, "Line has an unexpected number of columns, line has " + line.size() + " columns, extractor has " + CSVInputFormatAdapter.this.columns.size(), logger);
            }
            for (String cell : line) {
                if (cell.length() <= 500000) continue;
                CSVInputFormatAdapter.this.warningsContext.addWarning(WarningsContext.WarningType.INPUT_DATA_VERY_LONG, "Unusually large column (quoting issue ?) : >>>>>>" + cell.substring(0, Math.min(500000, 500)) + "<<<<<<", logger);
            }
        }

        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        public RowWithFactories getCurrentValue() throws IOException, InterruptedException {
            return this.currentRow;
        }

        public float getProgress() throws IOException, InterruptedException {
            return this.realRecordReader.getProgress();
        }

        public void close() throws IOException {
            this.realRecordReader.close();
        }

        public String readLine() throws IOException {
            String s = this.currentLine;
            this.currentLine = null;
            return s;
        }
    }
}

