/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.dataflow.exec.geojoin;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.dataflow.exec.AbstractStagedThreadedBuiltinRunner;
import com.dataiku.dip.dataflow.exec.geojoin.DatasetAndSelection;
import com.dataiku.dip.dataflow.exec.geojoin.geotools.RowInputStreamDataStore;
import com.dataiku.dip.dataflow.utils.FlowJobUtils;
import com.dataiku.dip.datalayer.Column;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datalayer.RowInputStream;
import com.dataiku.dip.datalayer.sort.NumberedRow;
import com.dataiku.dip.datalayer.sort.SimpleRowsFileReader;
import com.dataiku.dip.datalayer.sort.Sorter;
import com.dataiku.dip.datalayer.sort.SpilledRowsStorage;
import com.dataiku.dip.datalayer.streamimpl.StreamColumn;
import com.dataiku.dip.datalayer.streamimpl.StreamColumnFactory;
import com.dataiku.dip.datalayer.streamimpl.StreamRowFactory;
import com.dataiku.dip.datasets.UniversalSingleThreadPuller;
import com.dataiku.dip.datasets.UniversalSingleThreadPusher;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.util.AutoDelete;
import com.dataiku.dip.utils.DKULogger;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import org.geotools.data.DataUtilities;
import org.geotools.data.collection.SpatialIndexFeatureCollection;
import org.geotools.data.simple.SimpleFeatureCollection;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.data.store.ContentFeatureCollection;
import org.geotools.feature.FeatureCollection;
import org.opengis.feature.simple.SimpleFeature;

public class SpillableIndexableDataSource
implements AutoCloseable {
    private static final DKULogger logger = DKULogger.getLogger((String)"dku.recipes.geojoin.spill");
    final DatasetAndSelection datasetAndSelection;
    private final StreamColumnFactory cf = new StreamColumnFactory();
    private final StreamRowFactory rf = new StreamRowFactory();
    public SimpleRowsFileReader reader;
    public Map<Long, Long> spilledRowsPositionsByRowNumber = new HashMap<Long, Long>();
    public SpilledRowsStorage storage;
    public Set<String> indexedColumns;
    public Set<String> spilledColumns;
    final DKULRUCache<Long, Row> spilledRowsCache = new DKULRUCache(2000);
    private AutoDelete tmpFolder;
    private SimpleFeatureSource indexedDataSource;
    private Map<Long, SimpleFeature> featuresById;

    public SpillableIndexableDataSource(DatasetAndSelection datasetAndSelection) {
        this.datasetAndSelection = datasetAndSelection;
    }

    public static Schema extractSchema(Set<String> selectedColumns, Schema schema) {
        if (selectedColumns == null) {
            return null;
        }
        Schema ret = new Schema();
        for (String sc : selectedColumns) {
            ret.addColumn(schema.getColumn(sc));
        }
        return ret;
    }

    public RowInputStream stream(AuthCtx authCtx) throws Exception {
        RowInputStream originalInputStream = UniversalSingleThreadPuller.pull(authCtx, this.datasetAndSelection.dataset, this.datasetAndSelection.selection, (ColumnFactory)this.cf);
        if (this.datasetAndSelection.selection.filter.distinct) {
            UniversalSingleThreadPuller.Stream stream = new UniversalSingleThreadPuller.Stream();
            AbstractStagedThreadedBuiltinRunner.KeepDistinctRowsProcessorOutput distinctOutput = new AbstractStagedThreadedBuiltinRunner.KeepDistinctRowsProcessorOutput(stream, this.datasetAndSelection.dataset.getSchema(), (RowFactory)this.rf, (ColumnFactory)this.cf, (ColumnFactory)this.cf, (File)FlowJobUtils.getTmpFolder("geojoin-sorter", "geojoin"), new Sorter.MergeSortParams());
            new Thread(() -> {
                try {
                    UniversalSingleThreadPusher.push(authCtx, this.datasetAndSelection.dataset, this.datasetAndSelection.selection, (ProcessorOutput)distinctOutput, (ColumnFactory)this.cf, (RowFactory)this.rf);
                    distinctOutput.lastRowEmitted();
                }
                catch (Exception e) {
                    logger.warnV((Throwable)e, "Failed to apply distinct filter on dataset: ", new Object[]{this.datasetAndSelection.dataset.getFullName()});
                }
            }).start();
            return stream;
        }
        return originalInputStream;
    }

    public SimpleFeature getFeatureById(Long id) {
        if (this.featuresById == null) {
            throw new IllegalStateException("Indexed feature store wasn't initialized");
        }
        return this.featuresById.get(id);
    }

    public SpilledRowInputStream spillAndStream(Set<String> spilledColumns, AuthCtx authCtx) throws Exception {
        this.spilledColumns = spilledColumns;
        RowInputStream initialStream = this.stream(authCtx);
        SpilledRowInputStream spilledRowsStream = new SpilledRowInputStream(initialStream, spilledColumns);
        Dataset ds = this.datasetAndSelection.dataset;
        this.tmpFolder = FlowJobUtils.getTmpFolder("spillable-indexed-data-source", ds.getFullName());
        Schema storageSchema = SpillableIndexableDataSource.extractSchema(spilledColumns, ds.getSchema());
        this.storage = new SpilledRowsStorage((File)this.tmpFolder, SpilledRowsStorage.factoryColumnsOfSchema((ColumnFactory)this.cf, storageSchema), new Sorter.MergeSortParams());
        this.reader = this.storage.newReader((RowFactory)this.rf, (ColumnFactory)this.cf);
        return spilledRowsStream;
    }

    public void spillAndIndex(Set<String> indexedColumns, Set<String> spilledColumns, AuthCtx authCtx) throws Exception {
        this.indexedColumns = indexedColumns;
        SpilledRowInputStream spilledStream = this.spillAndStream(Sets.newHashSet((Iterable)Iterables.concat(indexedColumns, spilledColumns)), authCtx);
        this.index(spilledStream);
    }

    void index(SpilledRowInputStream spilledStream) throws IOException {
        Schema schema = SpillableIndexableDataSource.extractSchema(this.indexedColumns, this.datasetAndSelection.dataset.getSchema());
        ArrayList<SchemaColumn> columns = new ArrayList();
        if (schema != null) {
            columns = schema.getColumns();
        }
        RowInputStreamDataStore dataStore = new RowInputStreamDataStore(columns, (ColumnFactory)this.cf, spilledStream);
        ContentFeatureCollection features = dataStore.getFeatureSource().getFeatures();
        this.indexedDataSource = DataUtilities.source((FeatureCollection)new SpatialIndexFeatureCollection((SimpleFeatureCollection)features));
        this.featuresById = dataStore.featuresById;
    }

    public SpilledRowInputStream getInputStreamFromSpill() throws IOException {
        return new SpilledRowInputStream(this.storage.newReader((RowFactory)this.rf, (ColumnFactory)this.cf), Sets.newHashSet((Iterable)Iterables.concat(this.indexedColumns, this.spilledColumns)), false);
    }

    public SimpleFeatureSource getIndexedDataSource() {
        if (this.indexedDataSource == null) {
            logger.warn((Object)"Accessing data source that hasn't been built");
        }
        return this.indexedDataSource;
    }

    public Row getRowByNumber(long num) throws IOException {
        return this.spilledRowsCache.getOrMiss(num, () -> {
            long positionOfRowNumber = this.spilledRowsPositionsByRowNumber.get(num);
            this.reader.position(positionOfRowNumber);
            return this.reader.next();
        });
    }

    @Override
    public void close() {
        try {
            if (this.storage != null) {
                logger.infoV("Deleting temporary storage: %s", new Object[]{this.tmpFolder.getAbsolutePath()});
                this.storage.close();
            }
            if (this.reader != null) {
                this.reader.close();
            }
        }
        catch (Exception e) {
            logger.warnV((Throwable)e, "Failed to close a data source for : %s", new Object[]{this.datasetAndSelection.dataset.getFullName()});
        }
    }

    public static class DKULRUCache<K, V> {
        private final int maxSize;
        private final LinkedHashMap<K, V> cache = new LinkedHashMap<K, V>(){

            @Override
            protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
                return this.size() >= maxSize;
            }
        };
        public int cacheHit;
        public int cacheMiss;

        public DKULRUCache(int size) {
            this.maxSize = size;
        }

        public boolean containsKey(K key) {
            return this.cache.containsKey(key);
        }

        public V getOrMiss(K key, Callable<V> cacheMissRead) {
            if (this.cache.containsKey(key)) {
                ++this.cacheHit;
                return this.cache.get(key);
            }
            ++this.cacheMiss;
            try {
                V val = cacheMissRead.call();
                this.cache.put(key, val);
                return val;
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    public class SpilledRowInputStream {
        public static final int SPILL_EVERY_N_ROWS = 1000;
        private final RowInputStream initialStream;
        private final Set<String> spilledColumns;
        private final List<NumberedRow> rowsToSpill = new ArrayList<NumberedRow>();
        private final boolean isSpillable;
        private int spilledRowsCnt = 0;

        public SpilledRowInputStream(RowInputStream initialStream, Set<String> spilledColumns) {
            this(initialStream, spilledColumns, true);
        }

        public SpilledRowInputStream(RowInputStream initialStream, Set<String> spilledColumns, boolean isSpillable) {
            this.initialStream = initialStream;
            this.spilledColumns = spilledColumns;
            this.isSpillable = isSpillable;
        }

        public String getFullDatasetName() {
            return SpillableIndexableDataSource.this.datasetAndSelection.dataset.getFullName();
        }

        public NumberedRow next() throws Exception {
            String dsName = SpillableIndexableDataSource.this.datasetAndSelection.dataset.getName();
            Row next = this.initialStream.next();
            int rowIdx = -1;
            if (next != null && this.spilledColumns != null && !this.spilledColumns.isEmpty()) {
                rowIdx = this.spilledRowsCnt++;
                this.rowsToSpill.add(new NumberedRow(this.extractColumns(next, this.spilledColumns), rowIdx));
            }
            if (next == null && !this.isSpillable) {
                return null;
            }
            if (this.isSpillable && this.rowsToSpill.size() > 0 && (next == null || this.rowsToSpill.size() % 1000 == 0)) {
                logger.infoV("Spilling %d rows of dataset %s to %s", new Object[]{this.rowsToSpill.size(), dsName, SpillableIndexableDataSource.this.tmpFolder.getAbsolutePath()});
                SpillableIndexableDataSource.this.spilledRowsPositionsByRowNumber.putAll(SpillableIndexableDataSource.this.storage.spillWithPositions(this.rowsToSpill).rowsPositions);
                this.rowsToSpill.clear();
            }
            if (next == null) {
                SpillableIndexableDataSource.this.storage.doneWriting();
                logger.infoV("Done Spilling dataset %s. Total rows %d", new Object[]{dsName, this.spilledRowsCnt});
                return null;
            }
            return new NumberedRow(next, rowIdx);
        }

        private Row extractColumns(Row next, Set<String> extractedColumnNames) {
            Row filteredRow = SpillableIndexableDataSource.this.rf.row();
            for (String storedColumnName : extractedColumnNames) {
                StreamColumn column = SpillableIndexableDataSource.this.cf.column(storedColumnName);
                filteredRow.put((Column)column, next.get((Column)column));
            }
            return filteredRow;
        }
    }
}

