/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats.hive;

import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.fs.HDFSProvider;
import com.dataiku.dip.futures.FutureAborter;
import com.dataiku.dip.input.InputSplitProgressListener;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.formats.FormatExtractor;
import com.dataiku.dip.input.formats.hive.DummyReporter;
import com.dataiku.dip.input.formats.hive.HiveFileFormatConfig;
import com.dataiku.dip.input.formats.hive.SerdeAccessUtils;
import com.dataiku.dip.input.formats.hive.serde.DSSRowConverter;
import com.dataiku.dip.input.formats.hive.serde.ObjectInspectorBuilder;
import com.dataiku.dip.input.stream.EnrichedInputStream;
import com.dataiku.dip.input.stream.StreamsInputSplit;
import com.dataiku.dip.util.HadoopUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.FluentIterable;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.security.UserGroupInformation;

public abstract class HiveFormatExtractor<T extends HiveFileFormatConfig>
implements FormatExtractor {
    protected static final DKULogger logger = DKULogger.getLogger((String)"dku.formats.hive");
    private final T config;
    private final String serdeClass;
    protected boolean serdeInitialized = false;
    private long limit = -1L;
    private Schema schema;
    private Object serde;
    protected final SerdeAccessUtils serdeAccessUtils;
    private InputSplitProgressListener listener = new InputSplitProgressListener();
    private DSSRowConverter rowConverter;
    private StructObjectInspector rowObjectInspector;
    final AtomicLong totalRecords = new AtomicLong();

    public HiveFormatExtractor(T config) {
        this.serdeClass = ((HiveFileFormatConfig)config).getSerdeClass();
        this.config = config;
        this.serdeAccessUtils = new SerdeAccessUtils();
    }

    @Override
    public void setLimit(ExtractionLimit limit) {
        this.limit = limit == null ? -1L : limit.maxRecords;
    }

    @Override
    public boolean canSetSchemaForExtractor() {
        return false;
    }

    @Override
    public void setSchema(Schema schema, boolean allowExtraColumns) {
        this.schema = schema;
    }

    @Override
    public void setProgressListener(InputSplitProgressListener listener) {
        this.listener = listener;
    }

    @Override
    public void setWarningsContext(WarningsContext warnContext) {
    }

    @Override
    public boolean run(StreamsInputSplit in, final ProcessorOutput out, final ColumnFactory cf, final RowFactory rf) throws Exception {
        EnrichedInputStream eis;
        JobConf jobConf;
        Path hdfsPath;
        FileSystem fs;
        HDFSProvider.HDFSInputStream hdfsEis;
        UserGroupInformation ugi;
        Boolean shouldContinue;
        final AtomicLong dssSplitRecords = new AtomicLong(0L);
        if (this.limit != -1L) {
            logger.info((Object)("Run format extractor with limit=" + this.limit + " totalRecordBefores=" + this.totalRecords.get()));
        } else {
            logger.info((Object)"Run format extractor without limit");
        }
        do {
            if ((eis = in.nextStream()) == null) {
                logger.infoV("Done processing DSS split, dssSplitRecords=%d totalRecords=%d", new Object[]{dssSplitRecords.get(), this.totalRecords.get()});
                return true;
            }
            if (!(eis instanceof HDFSProvider.HDFSInputStream)) {
                throw ErrorContext.iae((String)"This format not supported on non-HDFS datasets");
            }
            hdfsEis = (HDFSProvider.HDFSInputStream)eis;
            jobConf = hdfsEis.setupHadoopJobConf(false);
            hdfsPath = hdfsEis.getFSPath();
            fs = hdfsEis.getFS();
        } while ((shouldContinue = HadoopUtils.fixedUpDoAs(ugi = hdfsEis.getUGI(), new PrivilegedExceptionAction<Boolean>(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public Boolean run() throws Exception {
                HiveFormatExtractor.this.initSerdeDeserializer(cf, rf, fs, hdfsPath);
                logger.info((Object)("Process input file:" + String.valueOf(hdfsPath)));
                FileInputFormat.setInputPaths((JobConf)jobConf, (Path[])new Path[]{hdfsPath});
                InputFormat<Writable, Writable> format = HiveFormatExtractor.this.createInputFormat();
                long fileRecords = 0L;
                for (InputSplit split : format.getSplits(jobConf, 1)) {
                    logger.info((Object)"Processing Hadoop split");
                    final RecordReader recordReader = format.getRecordReader(split, jobConf, (Reporter)new DummyReporter());
                    Writable key = HiveFormatExtractor.this.getKey(HiveFormatExtractor.this.serde, (RecordReader<Writable, Writable>)recordReader);
                    Writable value = HiveFormatExtractor.this.getValue(HiveFormatExtractor.this.serde, (RecordReader<Writable, Writable>)recordReader);
                    try (FutureAborter.AutoCloseableAbortHook aborting = FutureAborter.pushAutoCloseableHook((Runnable)new Runnable(){

                        @Override
                        public void run() {
                            try {
                                logger.info((Object)"Abort reading hive results");
                                recordReader.close();
                            }
                            catch (IOException e) {
                                logger.error((Object)"Failed to close the record reader when aborting.", (Throwable)e);
                            }
                        }
                    });){
                        while (recordReader.next((Object)key, (Object)value)) {
                            Object obj = HiveFormatExtractor.this.serdeAccessUtils.deserialize(HiveFormatExtractor.this.serde, value);
                            Row row = HiveFormatExtractor.this.rowConverter.buildRow(obj, HiveFormatExtractor.this.rowObjectInspector);
                            if (HiveFormatExtractor.this.limit != -1L && HiveFormatExtractor.this.totalRecords.get() >= HiveFormatExtractor.this.limit) {
                                logger.infoV("Exiting Parquet push totalRecords=%d splitRecords=%d limit=%d", new Object[]{HiveFormatExtractor.this.totalRecords.get(), dssSplitRecords.get(), HiveFormatExtractor.this.limit});
                                Boolean bl = false;
                                return bl;
                            }
                            row.getRowContext().sourcePartition = eis.getPartition();
                            row.getRowContext().sourceFilepath = eis.getPathWithinProvider();
                            row.getRowContext().sourceFilename = eis.getFilename();
                            row.getRowContext().sourceRecord = fileRecords;
                            out.emitRow(row);
                            dssSplitRecords.incrementAndGet();
                            HiveFormatExtractor.this.totalRecords.incrementAndGet();
                            HiveFormatExtractor.this.listener.setData(0L, 0L, HiveFormatExtractor.this.totalRecords.get());
                        }
                        logger.infoV("Done processing Hadoop split, dssSplitRecords=%d", new Object[]{dssSplitRecords.get()});
                    }
                    finally {
                        recordReader.close();
                    }
                }
                logger.infoV("Done processing DSS stream, dssSplitRecords=%d totalRecords=%d", new Object[]{dssSplitRecords.get(), HiveFormatExtractor.this.totalRecords.get()});
                return true;
            }
        })).booleanValue());
        return false;
    }

    protected void initSerdeDeserializer(ColumnFactory cf, RowFactory rf, FileSystem fs, Path hdfsPathForSchemaInference) throws Exception {
        if (!this.serdeInitialized) {
            Object serdeObj;
            try {
                serdeObj = Class.forName(this.serdeClass).newInstance();
            }
            catch (Exception e) {
                throw new RuntimeException("Unable to load Hive SerDe: " + this.serdeClass, e);
            }
            if (!this.serdeAccessUtils.isSerde(serdeObj)) {
                throw new RuntimeException("Class " + this.serdeClass + " is not a Hive SerDe.");
            }
            this.serde = serdeObj;
            if (this.schema == null) {
                this.schema = this.inferSchema(fs, hdfsPathForSchemaInference);
            }
            if (this.schema == null) {
                throw new RuntimeException("The schema must be manually provided for reading this file. Schema auto-detection was not possible.");
            }
            String colTypes = ObjectInspectorUtils.getFieldTypes((StructObjectInspector)new ObjectInspectorBuilder().buildStructObjectInspector(this.schema.asColumn()));
            String colNames = Joiner.on((String)",").join((Iterable)FluentIterable.from((Iterable)this.schema.getColumns()).transform((Function)new Function<SchemaColumn, Object>(){

                public Object apply(SchemaColumn schemaColumn) {
                    return schemaColumn.getName().toLowerCase();
                }
            }));
            Properties tbl = new Properties();
            tbl.putAll((Map<?, ?>)((HiveFileFormatConfig)this.config).getSerdeProperties());
            tbl.setProperty("columns.types", colTypes);
            tbl.setProperty("columns", colNames);
            logger.info((Object)("Initialize Hive deserializer " + this.serdeClass));
            logger.info((Object)("Table properties : \n" + JSON.pretty((Object)tbl)));
            this.serdeAccessUtils.initialize(this.serde, new Configuration(), tbl);
            this.rowObjectInspector = (StructObjectInspector)this.serdeAccessUtils.getObjectInspector(this.serde);
            this.rowConverter = new DSSRowConverter(cf, rf, this.schema.getColumns(), (HiveFileFormatConfig)this.config);
            this.serdeInitialized = true;
        }
    }

    public abstract InputFormat<Writable, Writable> createInputFormat();

    public abstract Writable getKey(Object var1, RecordReader<Writable, Writable> var2);

    public abstract Writable getValue(Object var1, RecordReader<Writable, Writable> var2);

    protected abstract Schema inferSchema(FileSystem var1, Path var2) throws Exception;

    protected T getConfig() {
        return this.config;
    }
}

