/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.dataflow.exec.grouping;

import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.dataflow.exec.grouping.GroupingRecipePayloadParams;
import com.dataiku.dip.dataflow.exec.grouping.OneGroup;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.RowInputStream;
import com.dataiku.dip.datalayer.sort.NumberedRow;
import com.dataiku.dip.datalayer.sort.RowAndSortMark;
import com.dataiku.dip.datalayer.sort.RowsComparator;
import com.dataiku.dip.datalayer.sort.SortedRowsIterator;
import com.dataiku.dip.datalayer.sort.Sorter;
import com.dataiku.dip.datalayer.sort.SpilledRowsStorage;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datalayer.window.AvgAggregation;
import com.dataiku.dip.datalayer.window.ConcatAggregation;
import com.dataiku.dip.datalayer.window.CountAggregation;
import com.dataiku.dip.datalayer.window.CountDistinctAggregation;
import com.dataiku.dip.datalayer.window.CountUniqueAggregation;
import com.dataiku.dip.datalayer.window.FirstOrLastOrderedByColumnAggregation;
import com.dataiku.dip.datalayer.window.GlobalCountAggregation;
import com.dataiku.dip.datalayer.window.MinOrMaxAggregation;
import com.dataiku.dip.datalayer.window.StddevAggregation;
import com.dataiku.dip.datalayer.window.SumAggregation;
import com.dataiku.dip.datalayer.window.WindowAggregation;
import com.dataiku.dip.datasets.SchemaUtils;
import com.dataiku.dip.utils.ErrorContext;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

public class Grouper2
implements AutoCloseable {
    private final File folder;
    private final List<GroupingRecipePayloadParams.GroupingKey> keysDesc;
    private final List<GroupingRecipePayloadParams.GroupingValue> valuesDesc;
    private final String globalCountColumnName;
    private final Sorter.MergeSortParams mergeSortParams;
    private final Integer identifierMaxLength;
    private SpilledRowsStorage storage = null;
    private Sorter sorter = null;
    private List<Sorter.SortSpec> sorterSpecs = Lists.newArrayList();
    private ColumnFactory sorterOutputCf = new StreamColumnFactory();
    private int totalRowCount = 0;
    private List<WindowAggregation> aggregations = null;
    private Comparator<NumberedRow> partitionComparator = null;
    private List<Sorter.SortSpec> groupSpecs = Lists.newArrayList();
    private final boolean schemalessSuffixing;
    private static Logger logger = Logger.getLogger((String)"dip.grouper");

    public Grouper2(List<GroupingRecipePayloadParams.GroupingKey> keysDesc, List<GroupingRecipePayloadParams.GroupingValue> valuesDesc, String globalCountColumnName, File folder, Sorter.MergeSortParams mergeSortParams, Integer identifierMaxLength) {
        this(keysDesc, valuesDesc, globalCountColumnName, folder, mergeSortParams, identifierMaxLength, false);
    }

    public Grouper2(List<GroupingRecipePayloadParams.GroupingKey> keysDesc, List<GroupingRecipePayloadParams.GroupingValue> valuesDesc, String globalCountColumnName, File folder, Sorter.MergeSortParams mergeSortParams, Integer identifierMaxLength, boolean schemalessSuffixing) {
        this.keysDesc = keysDesc;
        this.valuesDesc = valuesDesc;
        this.globalCountColumnName = globalCountColumnName;
        this.folder = folder;
        this.mergeSortParams = mergeSortParams;
        this.identifierMaxLength = identifierMaxLength;
        this.schemalessSuffixing = schemalessSuffixing;
    }

    public static Grouper2 withSchemalessSuffixing(List<GroupingRecipePayloadParams.GroupingKey> keysDesc, List<GroupingRecipePayloadParams.GroupingValue> valuesDesc, String globalCountColumnName, File folder, Sorter.MergeSortParams mergeSortParams, Integer identifierMaxLength) {
        return new Grouper2(keysDesc, valuesDesc, globalCountColumnName, folder, mergeSortParams, identifierMaxLength, true);
    }

    public void cancel() throws Exception {
        if (this.sorter != null) {
            this.sorter.cancel();
        }
    }

    @Override
    public void close() throws IOException {
        if (this.storage != null) {
            this.storage.close();
        }
    }

    public void startAccumulating(ColumnFactory inputCf, Schema inputSchema) throws IOException {
        for (GroupingRecipePayloadParams.GroupingKey key : this.keysDesc) {
            this.sorterSpecs.add(new Sorter.SortSpec(key.column, true));
        }
        this.sorterOutputCf = new StreamColumnFactory();
        StreamRowFactory sorterOutputRf = new StreamRowFactory();
        boolean needsAllColumns = false;
        for (GroupingRecipePayloadParams.GroupingValue value : this.valuesDesc) {
            needsAllColumns |= value.condition != null;
        }
        ArrayList spilledColumns = Lists.newArrayList();
        if (needsAllColumns) {
            logger.info((Object)"Grouping definition doesn't explicitely define needed columns, spilling them all");
            spilledColumns.addAll(SpilledRowsStorage.factoryColumnsOfSchema(inputCf, inputSchema));
        } else {
            HashSet columnNames = Sets.newHashSet();
            for (GroupingRecipePayloadParams.GroupingKey key : this.keysDesc) {
                columnNames.add(key.column);
            }
            for (GroupingRecipePayloadParams.GroupingValue value : this.valuesDesc) {
                if (!value.hasAnyAggr()) continue;
                columnNames.add(value.column);
                if (value.orderColumn == null || inputSchema.getColumn(value.orderColumn) == null) continue;
                columnNames.add(value.orderColumn);
            }
            for (String columnName : columnNames) {
                spilledColumns.add(inputCf.column(columnName));
            }
        }
        ArrayList spilledColumnNames = Lists.newArrayList();
        for (Column spilledColumn : spilledColumns) {
            spilledColumnNames.add(spilledColumn.getName());
        }
        logger.info((Object)("Spilling columns : " + Joiner.on((String)", ").join((Iterable)spilledColumnNames)));
        this.storage = new SpilledRowsStorage(this.folder, spilledColumns, this.mergeSortParams);
        this.sorter = new Sorter(this.sorterSpecs, inputSchema, inputCf, this.storage, (RowFactory)sorterOutputRf, this.sorterOutputCf, this.mergeSortParams);
    }

    public void accumulate(Row row) throws Exception {
        this.sorter.emitRow(row);
        ++this.totalRowCount;
    }

    public void finishAccumulating() throws Exception {
        this.sorter.lastRowEmitted();
        this.storage.doneWriting();
    }

    public void startAggregating(ColumnFactory outputCf, Schema inputSchema) {
        for (Sorter.SortSpec key : this.sorterSpecs) {
            this.groupSpecs.add(new Sorter.SortSpec(key));
        }
        for (Sorter.SortSpec key : this.groupSpecs) {
            key.factoryColumn = this.sorterOutputCf.column(key.column);
        }
        this.partitionComparator = new RowsComparator(this.groupSpecs, RowsComparator.NullsOrdering.AUTO);
        for (GroupingRecipePayloadParams.GroupingValue valueDesc : this.valuesDesc) {
            if (valueDesc.condition == null) continue;
            valueDesc.condition.setColumnFactory(this.sorterOutputCf);
        }
        this.aggregations = this.buildAggregationList(outputCf, inputSchema, this.sorterOutputCf);
        logger.info((Object)("compute " + this.aggregations.size() + " for grouping"));
    }

    public void aggregate(ProcessorOutput out, ColumnFactory outputCf, RowFactory outputRf) throws Exception {
        logger.info((Object)"Opening 2 iterators for the head and tail of groups");
        SortedRowsIterator headPosition = this.sorter.read();
        SortedRowsIterator tailPosition = this.sorter.read();
        GroupPartition groupPartition = this.advancePartition(headPosition, headPosition.next(), this.partitionComparator);
        long groupCount = 0L;
        while (groupPartition != null && groupPartition.first != null && groupPartition.last != null) {
            ++groupCount;
            if (groupPartition.last.mark.index - groupPartition.first.mark.index > 100000L) {
                logger.info((Object)("Aggregate on partition [" + groupPartition.first.mark.index + ", " + groupPartition.last.mark.index + "]"));
            } else if (groupCount % 1000L == 0L) {
                logger.info((Object)("Aggregated " + groupPartition.last.mark.index + " rows into " + groupCount + " partitions"));
            }
            Row row = outputRf.row();
            for (Sorter.SortSpec key : this.groupSpecs) {
                row.put(outputCf.column(key.column), groupPartition.first.row.row.get(key.factoryColumn));
            }
            this.compute(tailPosition, groupPartition, row, this.aggregations);
            out.emitRow(row);
            groupPartition = this.advancePartition(headPosition, groupPartition.next, this.partitionComparator);
        }
        logger.info((Object)("Sorted rows write stats : sorter : " + this.sorter.stats()));
        logger.info((Object)("Sorted rows read stats : position : " + headPosition.stats()));
    }

    public void finishAggregating() {
    }

    public void compute(RowInputStream is, ProcessorOutput out, ColumnFactory inputCf, ColumnFactory outputCf, Schema inputSchema, RowFactory outputRf) throws Exception {
        this.startAccumulating(inputCf, inputSchema);
        Row row = is.next();
        while (row != null) {
            this.accumulate(row);
            row = is.next();
        }
        this.finishAccumulating();
        logger.info((Object)"Sort for grouping sort, now iterating");
        if (this.totalRowCount == 0) {
            return;
        }
        this.startAggregating(outputCf, inputSchema);
        this.aggregate(out, outputCf, outputRf);
        this.finishAggregating();
    }

    private List<WindowAggregation> buildAggregationList(ColumnFactory outputCf, Schema inputSchema, ColumnFactory sorterOutputCf) {
        ArrayList aggregationList = Lists.newArrayList();
        List<GroupingRecipePayloadParams.GroupingValue> usedValues = this.valuesDesc.stream().filter(GroupingRecipePayloadParams.GroupingValue::hasAnyAggr).toList();
        SchemaUtils.SafeColumnIdentifierSuffixer safeSuffixer = this.schemalessSuffixing ? new SchemaUtils.SchemalessSafeColumnIdentifierSuffixer(this.identifierMaxLength) : new SchemaUtils.SafeColumnIdentifierSuffixer(this.identifierMaxLength, inputSchema);
        for (GroupingRecipePayloadParams.GroupingValue valueDesc : usedValues) {
            String column;
            SchemaColumn orderSchemaColumn;
            Column orderColumn;
            String column2;
            Column aggregatedColumn = sorterOutputCf.column(valueDesc.column);
            SchemaColumn aggregatedSchemaColumn = inputSchema.getColumn(valueDesc.column);
            if (aggregatedSchemaColumn == null && valueDesc.hasAnyAggr()) {
                throw ErrorContext.iaef((String)"Using aggregation on column \"%s\", which does not exist in input schema or in computed columns", (Object)valueDesc.column, (Object[])new Object[0]);
            }
            if (valueDesc.count) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_count");
                aggregationList.add(new CountAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
            }
            if (valueDesc.countDistinct) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_distinct");
                aggregationList.add(new CountDistinctAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
            }
            if (valueDesc.sum || valueDesc.sum2) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_sum");
                aggregationList.add(new SumAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
            }
            if (valueDesc.concat) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_concat");
                aggregationList.add(new ConcatAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn, valueDesc.concatSeparator, valueDesc.concatDistinct));
            }
            if (valueDesc.avg) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_avg");
                aggregationList.add(new AvgAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
            }
            if (valueDesc.median) {
                throw new RuntimeException("Median aggregation is not implemented for DSS Engine");
            }
            if (valueDesc.stddev) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_stddev");
                aggregationList.add(new StddevAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
            }
            if (valueDesc.max) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_max");
                aggregationList.add(new MinOrMaxAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn, false));
            }
            if (valueDesc.min) {
                column2 = safeSuffixer.addSuffix(valueDesc.column, "_min");
                aggregationList.add(new MinOrMaxAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn, true));
            }
            if (valueDesc.first) {
                orderColumn = sorterOutputCf.column(valueDesc.orderColumn);
                orderSchemaColumn = Optional.ofNullable(inputSchema.getColumn(valueDesc.orderColumn)).orElseThrow(() -> ErrorContext.iaef((String)"Using the order column \"%s\", which does not exist in input schema or in computed columns", (Object)valueDesc.orderColumn, (Object[])new Object[0]));
                column = safeSuffixer.addSuffix(valueDesc.column, "_first");
                aggregationList.add(new FirstOrLastOrderedByColumnAggregation(aggregatedColumn, outputCf.column(column), aggregatedSchemaColumn, orderColumn, true, orderSchemaColumn, valueDesc.condition, valueDesc.firstLastNotNull));
            }
            if (valueDesc.last) {
                orderColumn = sorterOutputCf.column(valueDesc.orderColumn);
                orderSchemaColumn = Optional.ofNullable(inputSchema.getColumn(valueDesc.orderColumn)).orElseThrow(() -> ErrorContext.iaef((String)"Using the order column \"%s\", which does not exist in input schema or in computed columns", (Object)valueDesc.orderColumn, (Object[])new Object[0]));
                column = safeSuffixer.addSuffix(valueDesc.column, "_last");
                aggregationList.add(new FirstOrLastOrderedByColumnAggregation(aggregatedColumn, outputCf.column(column), aggregatedSchemaColumn, orderColumn, false, orderSchemaColumn, valueDesc.condition, valueDesc.firstLastNotNull));
            }
            if (!valueDesc.countUnique) continue;
            column2 = safeSuffixer.addSuffix(valueDesc.column, "_unique");
            aggregationList.add(new CountUniqueAggregation(aggregatedColumn, outputCf.column(column2), aggregatedSchemaColumn));
        }
        if (StringUtils.isNotBlank((String)this.globalCountColumnName)) {
            String column = safeSuffixer.addSuffix(this.globalCountColumnName, "");
            aggregationList.add(new GlobalCountAggregation(outputCf.column(column)));
        }
        return aggregationList;
    }

    public Schema getOutputSchema(Schema inputSchema) throws Exception {
        Schema outputSchema = new Schema();
        SchemaUtils.SafeColumnIdentifierSuffixer safeSuffixer = new SchemaUtils.SafeColumnIdentifierSuffixer(this.identifierMaxLength, inputSchema);
        for (GroupingRecipePayloadParams.GroupingKey groupingKey : this.keysDesc) {
            outputSchema.addColumn(inputSchema.getColumn(groupingKey.column));
        }
        List usedValues = this.valuesDesc.stream().filter(GroupingRecipePayloadParams.GroupingValue::hasAnyAggr).collect(Collectors.toList());
        for (GroupingRecipePayloadParams.GroupingValue valueDesc : usedValues) {
            String column;
            SchemaColumn aggregatedSchemaColumn = inputSchema.getColumn(valueDesc.column);
            if (aggregatedSchemaColumn == null) {
                throw ErrorContext.iaef((String)"Using aggregation on column \"%s\", which does not exist in input schema or in computed columns", (Object)valueDesc.column, (Object[])new Object[0]);
            }
            if (valueDesc.count) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_count");
                outputSchema.addColumn(CountAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.countDistinct) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_distinct");
                outputSchema.addColumn(CountDistinctAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.sum) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_sum");
                outputSchema.addColumn(SumAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.concat) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_concat");
                outputSchema.addColumn(ConcatAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.avg) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_avg");
                outputSchema.addColumn(AvgAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.stddev) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_stddev");
                outputSchema.addColumn(StddevAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.max) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_max");
                outputSchema.addColumn(MinOrMaxAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.min) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_min");
                outputSchema.addColumn(MinOrMaxAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.first) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_first");
                outputSchema.addColumn(FirstOrLastOrderedByColumnAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (valueDesc.last) {
                column = safeSuffixer.addSuffix(valueDesc.column, "_last");
                outputSchema.addColumn(FirstOrLastOrderedByColumnAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
            }
            if (!valueDesc.countUnique) continue;
            column = safeSuffixer.addSuffix(valueDesc.column, "_unique");
            outputSchema.addColumn(CountUniqueAggregation.buildOutputSchemaColumn(column, aggregatedSchemaColumn));
        }
        if (StringUtils.isNotBlank((String)this.globalCountColumnName)) {
            String string = safeSuffixer.addSuffix(this.globalCountColumnName, "");
            outputSchema.addColumn(GlobalCountAggregation.buildOutputSchemaColumn(string));
        }
        return outputSchema;
    }

    private GroupPartition advancePartition(SortedRowsIterator partitionPosition, RowAndSortMark first, Comparator<NumberedRow> partitionComparator) throws IOException {
        RowAndSortMark last = first;
        RowAndSortMark next = null;
        boolean gotDifference = false;
        while (partitionPosition.hasNext()) {
            next = partitionPosition.next();
            if (partitionComparator.compare(first.row, next.row) == 0) {
                last = next;
                continue;
            }
            gotDifference = true;
            break;
        }
        if (!gotDifference && !partitionPosition.hasNext()) {
            next = null;
        }
        GroupPartition position = new GroupPartition();
        position.first = first;
        position.last = last;
        position.next = next;
        return position;
    }

    private void compute(SortedRowsIterator groupPosition, GroupPartition partitionPosition, Row row, List<WindowAggregation> aggregations) throws Exception {
        OneGroup window = new OneGroup(aggregations);
        groupPosition.reset(partitionPosition.first);
        for (long i = partitionPosition.first.mark.index; i <= partitionPosition.last.mark.index; ++i) {
            RowAndSortMark rowAndMark = groupPosition.next();
            window.expand(rowAndMark.row.row);
        }
        window.produceValue(row);
    }

    class GroupPartition {
        RowAndSortMark first;
        RowAndSortMark last;
        RowAndSortMark next;

        GroupPartition() {
        }
    }
}

