/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.formats.delta;

import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.fs.HDFSableDatasetHandler;
import com.dataiku.dip.formats.delta.DeltaFormat;
import com.dataiku.dip.formats.delta.DeltaFormatUtils;
import com.dataiku.dip.formats.delta.DeltaReaderClassLoaderLoader;
import com.dataiku.dip.formats.delta.DeltaReaderUtils;
import com.dataiku.dip.futures.FutureProgressState;
import com.dataiku.dip.input.InputSplitProgressListener;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.row.RowsInputSplitWithSchema;
import com.dataiku.dip.partitioning.DimensionValue;
import com.dataiku.dip.partitioning.ExactValueDimensionValue;
import com.dataiku.dip.partitioning.FilePartitioner;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.partitioning.PartitioningScheme;
import com.dataiku.dip.partitioning.TimeDimension;
import com.dataiku.dip.partitioning.TimeDimensionValue;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.warnings.WarningsContext;
import com.google.common.collect.Lists;
import io.delta.kernel.Scan;
import io.delta.kernel.ScanBuilder;
import io.delta.kernel.Snapshot;
import io.delta.kernel.Table;
import io.delta.kernel.data.FilteredColumnarBatch;
import io.delta.kernel.defaults.engine.DefaultEngine;
import io.delta.kernel.engine.Engine;
import io.delta.kernel.expressions.And;
import io.delta.kernel.expressions.Column;
import io.delta.kernel.expressions.Literal;
import io.delta.kernel.expressions.Predicate;
import io.delta.kernel.internal.InternalScanFileUtils;
import io.delta.kernel.internal.data.ScanStateRow;
import io.delta.kernel.internal.util.Utils;
import io.delta.kernel.types.DataType;
import io.delta.kernel.types.DateType;
import io.delta.kernel.types.StructField;
import io.delta.kernel.types.StructType;
import io.delta.kernel.utils.CloseableIterator;
import io.delta.kernel.utils.FileStatus;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;

public class DeltaSplit
extends RowsInputSplitWithSchema {
    private final HDFSableDatasetHandler datasetHandler;
    private final boolean useDatasetSchema;
    private final Partition partition;
    private Schema readSchema;
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.delta.read");

    public DeltaSplit(HDFSableDatasetHandler datasetHandler, Partition partition, boolean useDatasetSchema) {
        this.datasetHandler = datasetHandler;
        this.partition = partition;
        this.useDatasetSchema = useDatasetSchema;
    }

    public long push(ProcessorOutput out, ColumnFactory cf, RowFactory rf, @Nullable ExtractionLimit limit, InputSplitProgressListener listener, WarningsContext warningsContext) throws Exception {
        try (DeltaReaderClassLoaderLoader classLoaderLoader = new DeltaReaderClassLoaderLoader();){
            long l = this.pushInternal(out, cf, rf, limit, listener);
            return l;
        }
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private long pushInternal(ProcessorOutput out, ColumnFactory columnFactory, RowFactory rowFactory, @Nullable ExtractionLimit limit, InputSplitProgressListener listener) throws Exception {
        String path = this.datasetHandler.getFullyQualifiedRootPath();
        Configuration conf = new Configuration();
        this.datasetHandler.addExtraConf(conf, true);
        DefaultEngine engine = DefaultEngine.create((Configuration)conf);
        Table table = Table.forPath((Engine)engine, (String)path);
        Snapshot snapshot = table.getLatestSnapshot((Engine)engine);
        StructType dataSchema = snapshot.getSchema((Engine)engine);
        this.readSchema = new Schema();
        this.readSchema.columns.addAll(DeltaFormatUtils.convert(dataSchema, (DeltaFormat.Config)this.datasetHandler.getDataset().getFormatParamsAs(DeltaFormat.Config.class)));
        logger.info((Object)("Read with partition " + this.partition.id()));
        Map.Entry timeDimension = null;
        Predicate partitionFilter = null;
        if (!(this.partition.isAll() || this.partition.isNP() || this.partition.getDimensionValues().isEmpty())) {
            List<Predicate> timeClauses;
            List clauses = this.partition.getDimensionValues().entrySet().stream().filter(e -> e.getValue() instanceof ExactValueDimensionValue).map(e -> new Predicate("=", Arrays.asList(new Column((String)e.getKey()), DeltaFormatUtils.makeTypedLiteral(e, dataSchema)))).collect(Collectors.toList());
            timeDimension = this.partition.getDimensionValues().entrySet().stream().filter(e -> e.getValue() instanceof TimeDimensionValue).findAny().orElse(null);
            if (timeDimension != null && (timeClauses = this.tryConvertTimeDimensionToClauses(dataSchema, timeDimension)) != null) {
                clauses.addAll(timeClauses);
                timeDimension = null;
            }
            if (clauses.size() == 1) {
                partitionFilter = (Predicate)clauses.get(0);
            } else if (clauses.size() == 2) {
                partitionFilter = new And((Predicate)clauses.get(0), (Predicate)clauses.get(1));
            } else if (clauses.size() > 2) {
                partitionFilter = new And((Predicate)clauses.get(0), (Predicate)clauses.get(1));
                for (int i = 2; i < clauses.size(); ++i) {
                    partitionFilter = new And(partitionFilter, (Predicate)clauses.get(i));
                }
            }
        }
        Pattern partitionPathPattern = null;
        if (timeDimension != null) {
            String partitionPath = FilePartitioner.computePartitionRelPathAsFolder((Partition)this.partition, (PartitioningScheme)this.partition.getScheme());
            while (partitionPath.startsWith("/")) {
                partitionPath = partitionPath.substring(1);
            }
            logger.info((Object)("Filtering files directly with " + partitionPath));
            partitionPathPattern = Pattern.compile(partitionPath + ".*");
        }
        if (this.useDatasetSchema) {
            Schema datasetSchema = this.datasetHandler.getDataset().getSchema();
            DeltaFormatUtils.checkCompatibility(this.readSchema.columns, datasetSchema.columns, null);
            this.readSchema = datasetSchema;
        }
        ScanBuilder scanBuilder = snapshot.getScanBuilder((Engine)engine);
        if (partitionFilter != null) {
            scanBuilder.withFilter((Engine)engine, partitionFilter);
        }
        Scan scan = scanBuilder.build();
        Optional remainingFilter = scan.getRemainingFilter();
        io.delta.kernel.data.Row scanState = scan.getScanState((Engine)engine);
        StructType physicalReadSchema = ScanStateRow.getPhysicalDataReadSchema((Engine)engine, (io.delta.kernel.data.Row)scanState);
        logger.info((Object)("Reading with schema " + JSON.json((Object)this.readSchema)));
        logger.info((Object)("Reading with physical schema " + JSON.json((Object)physicalReadSchema)));
        List deltaFields = dataSchema.fields();
        ArrayList<DeltaReaderUtils.DeltaToDSSCellReader> columnReaders = new ArrayList<DeltaReaderUtils.DeltaToDSSCellReader>();
        for (int i = 0; i < deltaFields.size(); ++i) {
            StructField field = (StructField)deltaFields.get(i);
            columnReaders.add(DeltaReaderUtils.buildReader(i, field, columnFactory, this.readSchema.getColumn(field.getName())));
        }
        long rowsPushed = 0L;
        int rowsBefore = 0;
        try (CloseableIterator filesBatchIter = scan.getScanFiles((Engine)engine);){
            while (filesBatchIter.hasNext() && (limit == null || limit.maxRecords <= 0L || rowsPushed < limit.maxRecords)) {
                FilteredColumnarBatch filesBatch = (FilteredColumnarBatch)filesBatchIter.next();
                CloseableIterator filesIter = filesBatch.getRows();
                try {
                    while (filesIter.hasNext() && (limit == null || limit.maxRecords <= 0L || rowsPushed < limit.maxRecords)) {
                        io.delta.kernel.data.Row file = (io.delta.kernel.data.Row)filesIter.next();
                        FileStatus fileStatus = InternalScanFileUtils.getAddFileStatus((io.delta.kernel.data.Row)file);
                        if (timeDimension != null) {
                            String tableRoot;
                            String filePath = fileStatus.getPath();
                            if (filePath.startsWith(tableRoot = file.getString(filesBatch.getData().getSchema().indexOf("tableRoot")))) {
                                filePath = filePath.substring(tableRoot.length());
                            }
                            while (filePath.startsWith("/")) {
                                filePath = filePath.substring(1);
                            }
                            logger.info((Object)("Filtering files with prefix on " + filePath + " w.r.t. " + tableRoot));
                            if (!partitionPathPattern.matcher(filePath).matches()) continue;
                        }
                        rowsPushed = DeltaSplit.readOneFile(out, rowFactory, limit, listener, (Engine)engine, fileStatus, physicalReadSchema, remainingFilter, scanState, file, rowsPushed, columnReaders, rowsBefore);
                    }
                }
                finally {
                    if (filesIter == null) continue;
                    filesIter.close();
                }
            }
        }
        logger.infoV("Done iterating result set, returned %d rows", new Object[]{rowsPushed});
        return rowsPushed;
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private static long readOneFile(ProcessorOutput out, RowFactory rowFactory, @Nullable ExtractionLimit limit, InputSplitProgressListener listener, Engine engine, FileStatus fileStatus, StructType physicalReadSchema, Optional<Predicate> remainingFilter, io.delta.kernel.data.Row scanState, io.delta.kernel.data.Row file, long rowsPushed, List<DeltaReaderUtils.DeltaToDSSCellReader> columnReaders, int rowsBefore) throws Exception {
        try (CloseableIterator physicalDataIter = engine.getParquetHandler().readParquetFiles(Utils.singletonCloseableIterator((Object)fileStatus), physicalReadSchema, remainingFilter);
             CloseableIterator transformedData = Scan.transformPhysicalData((Engine)engine, (io.delta.kernel.data.Row)scanState, (io.delta.kernel.data.Row)file, (CloseableIterator)physicalDataIter);){
            block15: while (transformedData.hasNext()) {
                if (limit != null && limit.maxRecords > 0L && rowsPushed >= limit.maxRecords) {
                    return rowsPushed;
                }
                FilteredColumnarBatch logicalData = (FilteredColumnarBatch)transformedData.next();
                CloseableIterator rowsIter = logicalData.getRows();
                try {
                    while (true) {
                        if (!rowsIter.hasNext() || limit != null && limit.maxRecords > 0L && rowsPushed >= limit.maxRecords) continue block15;
                        io.delta.kernel.data.Row deltaRow = (io.delta.kernel.data.Row)rowsIter.next();
                        Row dssRow = rowFactory.row();
                        for (DeltaReaderUtils.DeltaToDSSCellReader columnReader : columnReaders) {
                            columnReader.convert(deltaRow, dssRow);
                        }
                        out.emitRow(dssRow);
                        if (++rowsPushed % 100L != 0L) continue;
                        if (listener != null) {
                            listener.setData(0L, 0L, (long)rowsBefore + rowsPushed);
                        }
                        FutureProgressState.checkInterrupt();
                        if (rowsPushed % 2000L != 0L) continue;
                        logger.infoV("Read %d records from Delta", new Object[]{rowsPushed});
                    }
                }
                finally {
                    if (rowsIter == null) continue;
                    rowsIter.close();
                }
            }
            return rowsPushed;
        }
    }

    private List<Predicate> tryConvertTimeDimensionToClauses(StructType dataSchema, Map.Entry<String, DimensionValue> timeDimension) throws DecoderException, UnsupportedEncodingException {
        ArrayList clauses = Lists.newArrayList();
        TimeDimension partitionDimension = (TimeDimension)this.partition.getScheme().getDimension(timeDimension.getKey());
        TimeDimensionValue partitionDimensionValue = (TimeDimensionValue)timeDimension.getValue();
        if (dataSchema.fieldNames().contains(timeDimension.getKey())) {
            Column column = new Column(timeDimension.getKey());
            DataType dataType = dataSchema.at(dataSchema.indexOf(timeDimension.getKey())).getDataType();
            if (dataType instanceof DateType && partitionDimension.mappedPeriod == TimeDimension.Period.DAY) {
                Literal lit = DeltaFormatUtils.makeSimpleTypedLiteral(dataType, partitionDimensionValue);
                clauses.add(new Predicate("=", Arrays.asList(column, lit)));
            } else {
                Literal low = DeltaFormatUtils.makeSimpleTypedLiteral(dataType, partitionDimensionValue);
                Literal high = DeltaFormatUtils.makeSimpleTypedLiteral(dataType, partitionDimensionValue.nextPeriod());
                Predicate geq = new Predicate(">=", Arrays.asList(column, low));
                Predicate lt = new Predicate("<", Arrays.asList(column, high));
                clauses.add(new And(geq, lt));
            }
        } else {
            Map timeNames = FilePartitioner.guessTimePeriodColumnNames((String)this.partition.getScheme().getFilePathPattern());
            ArrayList timeClauses = Lists.newArrayList();
            for (TimeDimension.Period p : TimeDimension.Period.values()) {
                String colName = (String)timeNames.get(p);
                String colValue = partitionDimensionValue.getPeriodValue(p);
                logger.info((Object)("For " + String.valueOf(p) + " n=" + colName + " v=" + colValue));
                if (StringUtils.isBlank((String)colName) || StringUtils.isBlank((String)colValue)) continue;
                if (colName.indexOf(37) >= 0) {
                    colName = new URLCodec().decode(colName, "utf-8");
                    logger.info((Object)("Decoded column name to " + colName));
                }
                if (dataSchema.fieldNames().contains(colName)) {
                    Column col = new Column(colName);
                    Literal lit = DeltaFormatUtils.makeSimpleTypedLiteral(colName, colValue, dataSchema);
                    timeClauses.add(new Predicate("=", Arrays.asList(col, lit)));
                    continue;
                }
                logger.warn((Object)("Column " + colName + " not found in delta schema"));
            }
            if (partitionDimension.mappedPeriod == TimeDimension.Period.YEAR && timeClauses.size() == 1 || partitionDimension.mappedPeriod == TimeDimension.Period.MONTH && timeClauses.size() == 2 || partitionDimension.mappedPeriod == TimeDimension.Period.DAY && timeClauses.size() == 3 || partitionDimension.mappedPeriod == TimeDimension.Period.HOUR && timeClauses.size() == 4) {
                clauses.addAll(timeClauses);
            } else {
                logger.warn((Object)"Unable to convert partition to equality filters");
                clauses = null;
            }
        }
        return clauses;
    }

    public String getDesc() {
        return "DeltaRead";
    }

    public Schema getSchema() {
        return this.useDatasetSchema ? this.datasetHandler.getDataset().getSchema() : this.readSchema;
    }
}

