/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats.hive.sequencefile;

import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.input.formats.hive.HiveFormatExtractor;
import com.dataiku.dip.input.formats.hive.sequencefile.LazySimpleSerDeSchemaInferrer;
import com.dataiku.dip.input.formats.hive.sequencefile.SequenceFileFormatConfig;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.SequenceFileInputFormat;

public class SequenceFileFormatExtractor
extends HiveFormatExtractor<SequenceFileFormatConfig> {
    public SequenceFileFormatExtractor(SequenceFileFormatConfig config) {
        super(config);
    }

    @Override
    public InputFormat<Writable, Writable> createInputFormat() {
        return new SequenceFileInputFormat();
    }

    @Override
    public Writable getKey(Object serde, RecordReader<Writable, Writable> recordReader) {
        return new BytesWritable();
    }

    @Override
    public Writable getValue(Object serde, RecordReader<Writable, Writable> recordReader) {
        try {
            return this.serdeAccessUtils.getSerializedClass(serde).newInstance();
        }
        catch (Exception e) {
            throw new RuntimeException("Unable to instanciate value class", e);
        }
    }

    @Override
    protected Schema inferSchema(FileSystem fs, Path hdfsPath) throws Exception {
        SequenceFileFormatConfig config = (SequenceFileFormatConfig)this.getConfig();
        Object serde = Class.forName(config.getSerdeClass()).newInstance();
        Configuration configuration = new Configuration();
        int nbRowsToCollect = 100;
        ArrayList<Writable> rows = new ArrayList<Writable>();
        try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, hdfsPath, configuration);){
            Writable writableValue = this.serdeAccessUtils.getSerializedClass(serde).newInstance();
            Writable writableKey = (Writable)reader.getKeyClass().newInstance();
            while (reader.next(writableKey, writableValue)) {
                rows.add(writableValue);
                if (--nbRowsToCollect >= 0) continue;
                break;
            }
        }
        if (config.getSerdeClass().equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {
            LazySimpleSerDeSchemaInferrer inferrer = new LazySimpleSerDeSchemaInferrer(config);
            return inferrer.detect(rows);
        }
        return null;
    }
}

