/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.input.formats.parquet;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaValidator;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datasets.fs.HDFSProvider;
import com.dataiku.dip.exceptions.DKUSecurityException;
import com.dataiku.dip.fs.FSEnumerationResult;
import com.dataiku.dip.fs.FSEnumerationSettings;
import com.dataiku.dip.input.formats.parquet.DSSParquetOutputFormat;
import com.dataiku.dip.input.formats.parquet.ParquetFormatConfig;
import com.dataiku.dip.input.formats.parquet.ParquetRowBuilder;
import com.dataiku.dip.input.formats.parquet.ReusableParquetRow;
import com.dataiku.dip.output.FileNamingAbleOutputWriter;
import com.dataiku.dip.output.OptionallyExtensible;
import com.dataiku.dip.output.OutputWriter;
import com.dataiku.dip.partitioning.FilePartitioner;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.util.HadoopUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.Pair;
import com.dataiku.dip.utils.PathUtils;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.parquet.hadoop.ParquetOutputFormat;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;

public class ParquetOutputWriter
extends OutputWriter
implements FileNamingAbleOutputWriter,
OptionallyExtensible<ParquetOutputWriter> {
    private static DKULogger logger = DKULogger.getLogger(ParquetOutputWriter.class);
    private final ParquetFormatConfig config;
    private final WarningsContext warningsContext;
    private final ReusableParquetRow parquetRow = new ReusableParquetRow();
    private final HDFSProvider provider;
    private final Dataset dataset;
    private Partition targetPartition;
    private int targetSplit;
    private RecordWriter<Void, ReusableParquetRow> recordWriter;
    private DSSParquetOutputFormat outputFormat;
    private TaskAttemptContext tac;
    private int rowCount = 0;
    private int failedCount = 0;
    private ParquetRowBuilder parquetRowBuilder;
    private UserGroupInformation ugi;
    private FileNamingAbleOutputWriter.FileNamingMode fileNamingMode = FileNamingAbleOutputWriter.FileNamingMode.EXTENT;
    private String fileNaminFixedId;
    private boolean extensible = false;

    public ParquetOutputWriter(HDFSProvider provider, Dataset dataset, Partition targetPartition, int targetSplit, WarningsContext warningsContext, ParquetFormatConfig config) throws DKUSecurityException, IOException {
        this.provider = provider;
        this.dataset = dataset;
        this.targetPartition = targetPartition;
        this.targetSplit = targetSplit;
        this.warningsContext = warningsContext;
        this.config = config;
        logger.info((Object)"Creating UGI");
        this.ugi = provider.makeUGI();
    }

    public ParquetOutputWriter withExtensible() {
        this.extensible = true;
        return this;
    }

    public void init(final ColumnFactory cf) throws Exception {
        String baseName;
        final Schema outputSchema = this.dataset.getSchema();
        new SchemaValidator().validate(outputSchema);
        if (outputSchema == null || outputSchema.getColumns().size() == 0) {
            throw new RuntimeException("The schema of the output dataset " + this.dataset.getFullName() + " has not been defined. Unable to write the Parquet file.");
        }
        String relPath = FilePartitioner.getRelPath(this.targetPartition, this.dataset.getPartitioningSchema());
        String basePath = this.provider.getPath(relPath);
        final Path outputPath = new Path(basePath);
        this.provider.mkdirs(relPath);
        if (this.fileNamingMode == FileNamingAbleOutputWriter.FileNamingMode.FIXED && StringUtils.isNotBlank((String)this.fileNaminFixedId)) {
            baseName = this.fileNaminFixedId;
        } else if (this.fileNamingMode == FileNamingAbleOutputWriter.FileNamingMode.TIMESTAMP) {
            baseName = null;
        } else if (this.extensible) {
            Object chunkBaseName = "part";
            if (this.someFileExists(PathUtils.concatLNT((String[])new String[]{relPath, chunkBaseName}))) {
                String extent;
                int extentId = 0;
                while (this.someFileExists(extent = PathUtils.concatLNT((String[])new String[]{relPath, chunkBaseName = "part" + extentId}))) {
                    ++extentId;
                }
            }
            baseName = chunkBaseName;
        } else {
            baseName = null;
        }
        this.ugi.doAs((PrivilegedExceptionAction)new PrivilegedExceptionAction<Void>(){

            @Override
            public Void run() throws IOException, InterruptedException {
                Configuration conf = ParquetOutputWriter.this.provider.setupHadoopConf(false);
                if (StringUtils.isNotBlank((String)baseName)) {
                    conf.set("mapreduce.output.basename", baseName);
                }
                Job job = new Job(conf);
                FileOutputFormat.setOutputPath((Job)job, (Path)outputPath);
                ParquetOutputFormat.setBlockSize((Job)job, (int)(ParquetOutputWriter.this.config.parquetBlockSizeMB * 1024 * 1024));
                try {
                    ParquetOutputFormat.setCompression((Job)job, (CompressionCodecName)CompressionCodecName.valueOf((String)ParquetOutputWriter.this.config.parquetCompressionMethod.name()));
                }
                catch (IllegalArgumentException exc) {
                    throw new IOException("Format '" + ParquetOutputWriter.this.config.parquetCompressionMethod.name() + "' is not  supported by your Hadoop didtribution.");
                }
                ParquetOutputWriter.this.tac = HadoopUtils.createTaskattemptContext(job, ParquetOutputWriter.this.targetSplit);
                boolean lowerCaseIdentifiers = ParquetOutputWriter.this.config.parquetLowerCaseIdentifiers;
                ParquetFormatConfig.Flavor flavor = ParquetOutputWriter.this.config.parquetFlavor;
                ParquetOutputWriter.this.outputFormat = new DSSParquetOutputFormat(outputSchema, cf, ParquetOutputWriter.this.warningsContext, lowerCaseIdentifiers, flavor);
                ParquetOutputWriter.this.outputFormat.getOutputCommitter(ParquetOutputWriter.this.tac).setupJob((JobContext)ParquetOutputWriter.this.tac);
                ParquetOutputWriter.this.outputFormat.getOutputCommitter(ParquetOutputWriter.this.tac).setupTask(ParquetOutputWriter.this.tac);
                ParquetOutputWriter.this.recordWriter = ParquetOutputWriter.this.outputFormat.getRecordWriter(ParquetOutputWriter.this.tac);
                ParquetOutputWriter.this.parquetRowBuilder = new ParquetRowBuilder(outputSchema, cf, lowerCaseIdentifiers, flavor);
                return null;
            }
        });
    }

    private boolean someFileExists(String extent) {
        Pair parentAndBaseName = PathUtils.splitBasename((String)extent);
        logger.info((Object)("Check if file exists with prefix " + (String)parentAndBaseName.first + " / " + (String)parentAndBaseName.second));
        FSEnumerationSettings enumerationSettings = FSEnumerationSettings.filesPrefixedBy((String)(((String)parentAndBaseName.second).replace("-", "\\-") + "[_-].*"));
        FSEnumerationResult enumeration = this.provider.enumerateRecursive((String)parentAndBaseName.first, enumerationSettings);
        return enumeration != null && enumeration.isSuccessful() && enumeration.getPaths() != null && enumeration.getPaths().iterator().hasNext();
    }

    public long writtenBytes() throws IOException {
        return -1L;
    }

    public void emitRow(Row row) throws Exception {
        this.parquetRow.clear();
        boolean ok = false;
        try {
            this.parquetRowBuilder.build(row, this.parquetRow);
            ok = true;
        }
        catch (Exception e) {
            this.warningsContext.addWarning(WarningsContext.WarningType.OUTPUT_DATA_BAD_TYPE, "Unable to write row " + (this.rowCount + this.failedCount) + " to Parquet: " + e.getMessage(), (Throwable)e, logger);
        }
        if (ok) {
            this.recordWriter.write(null, (Object)this.parquetRow);
            ++this.rowCount;
        } else {
            ++this.failedCount;
        }
        if ((this.rowCount + this.failedCount) % 100000 == 0) {
            logger.info((Object)("Processed " + this.rowCount + " rows (" + this.failedCount + " failed)"));
        }
    }

    private void closeOnce() throws IOException, InterruptedException {
        this.ugi.doAs((PrivilegedExceptionAction)new PrivilegedExceptionAction<Void>(){

            @Override
            public Void run() throws IOException, InterruptedException {
                if (ParquetOutputWriter.this.recordWriter != null) {
                    ParquetOutputWriter.this.recordWriter.close(ParquetOutputWriter.this.tac);
                    ParquetOutputWriter.this.recordWriter = null;
                }
                if (ParquetOutputWriter.this.outputFormat != null) {
                    ParquetOutputWriter.this.outputFormat.getOutputCommitter(ParquetOutputWriter.this.tac).commitTask(ParquetOutputWriter.this.tac);
                    ParquetOutputWriter.this.outputFormat.getOutputCommitter(ParquetOutputWriter.this.tac).commitJob((JobContext)ParquetOutputWriter.this.tac);
                    ParquetOutputWriter.this.outputFormat = null;
                }
                return null;
            }
        });
    }

    public void lastRowEmitted() throws Exception {
        logger.info((Object)("Processed " + this.rowCount + " rows (" + this.failedCount + " failed)"));
        this.closeOnce();
    }

    public void cancel() throws Exception {
        this.closeOnce();
    }

    public void setFileNamingMode(FileNamingAbleOutputWriter.FileNamingMode mode, String fixedId) {
        this.fileNamingMode = mode;
        this.fileNaminFixedId = fixedId;
    }

    public List<String> gatherWritesInProgress() throws IOException, InterruptedException {
        return (List)this.ugi.doAs((PrivilegedExceptionAction)new PrivilegedExceptionAction<List<String>>(){

            @Override
            public List<String> run() throws IOException, InterruptedException {
                OutputCommitter committer;
                ArrayList ret = Lists.newArrayList();
                if (ParquetOutputWriter.this.outputFormat != null && (committer = ParquetOutputWriter.this.outputFormat.getOutputCommitter(ParquetOutputWriter.this.tac)) instanceof FileOutputCommitter) {
                    Path jobPath;
                    FileOutputCommitter fileCommitter = (FileOutputCommitter)committer;
                    try {
                        jobPath = fileCommitter.getJobAttemptPath((JobContext)ParquetOutputWriter.this.tac);
                        ret.add(ParquetOutputWriter.this.pathToRelPathInDataset(jobPath));
                    }
                    catch (Exception e) {
                        logger.warn((Object)("Unable to get job temp path: " + e.getMessage()));
                    }
                    try {
                        Path taskPath = fileCommitter.getTaskAttemptPath(ParquetOutputWriter.this.tac);
                        ret.add(ParquetOutputWriter.this.pathToRelPathInDataset(taskPath));
                    }
                    catch (Exception e) {
                        logger.warn((Object)("Unable to get task temp path: " + e.getMessage()));
                    }
                    try {
                        jobPath = fileCommitter.getCommittedTaskPath(ParquetOutputWriter.this.tac);
                        ret.add(ParquetOutputWriter.this.pathToRelPathInDataset(jobPath));
                    }
                    catch (Exception e) {
                        logger.warn((Object)("Unable to get commited task temp path: " + e.getMessage()));
                    }
                }
                return ret;
            }
        });
    }

    private String pathToRelPathInDataset(Path jobPath) {
        String jobPathWithinAuthority = PathUtils.makeLeadingNoTrailing((String)Path.getPathWithoutSchemeAndAuthority((Path)jobPath).toUri().toString());
        return jobPathWithinAuthority.substring(this.provider.getRoot().length());
    }
}

