/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.formats.delta;

import com.dataiku.dip.DKUApp;
import com.dataiku.dip.cluster.Cluster;
import com.dataiku.dip.cluster.ClusterProperty;
import com.dataiku.dip.cluster.ClusterPropertyList;
import com.dataiku.dip.cluster.ClusterSelector;
import com.dataiku.dip.cluster.ClusterSettings;
import com.dataiku.dip.cluster.HadoopSettings;
import com.dataiku.dip.connections.DSSConnection;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.coremodel.SimpleKeyValue;
import com.dataiku.dip.datalayer.ColumnFactory;
import com.dataiku.dip.datalayer.ProcessorOutput;
import com.dataiku.dip.datalayer.Row;
import com.dataiku.dip.datalayer.RowFactory;
import com.dataiku.dip.datasets.SamplingParam;
import com.dataiku.dip.datasets.SchemaUtils;
import com.dataiku.dip.datasets.StreamableDatasetSelection;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.datasets.fs.HDFSableDatasetHandler;
import com.dataiku.dip.datasets.sql.AbstractSQLDatasetHandler;
import com.dataiku.dip.formats.delta.DeltaFormat;
import com.dataiku.dip.futures.FutureProgressState;
import com.dataiku.dip.input.InputSplitProgressListener;
import com.dataiku.dip.input.formats.ExtractionLimit;
import com.dataiku.dip.input.row.RowsInputSplitWithSchema;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.impersonation.ImpersonationResolverService;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.spark.SparkSQLConnectionPoolService;
import com.dataiku.dip.spark.sparksql.DkuDatasetReadPreparedStatement;
import com.dataiku.dip.sql.SparkSQLDialect;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.utils.JSON;
import com.dataiku.dip.warnings.WarningsContext;
import com.dataiku.dss.shadelib.org.joda.time.DateTimeZone;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.commons.lang.StringUtils;

public class DeltaReader {
    private static DKULogger logger = DKULogger.getLogger((String)"dku.spark.Read");

    public static class SampleSplit
    extends GlobalSplitBase {
        public SampleSplit(AuthCtx authCtx, HDFSableDatasetHandler datasetHandler) {
            super(authCtx, datasetHandler, false);
        }
    }

    public static class GlobalSplit
    extends GlobalSplitBase {
        public GlobalSplit(AuthCtx authCtx, HDFSableDatasetHandler datasetHandler) {
            super(authCtx, datasetHandler, true);
        }
    }

    public static abstract class GlobalSplitBase
    extends RowsInputSplitWithSchema {
        private final AuthCtx authCtx;
        private final HDFSableDatasetHandler datasetHandler;
        private final String projectKey;
        private final boolean useDatasetSchema;
        private Schema schema;

        public GlobalSplitBase(AuthCtx authCtx, HDFSableDatasetHandler datasetHandler, boolean useDatasetSchema) {
            this.authCtx = authCtx;
            this.datasetHandler = datasetHandler;
            this.projectKey = datasetHandler.getDataset().getProjectKey();
            this.useDatasetSchema = useDatasetSchema;
        }

        @Override
        public long push(ProcessorOutput out, ColumnFactory cf, RowFactory rf, @Nullable ExtractionLimit limit, InputSplitProgressListener listener, WarningsContext warningsContext) throws Exception {
            boolean checkSecurity;
            ClusterSelector clusterSelector = new ClusterSelector();
            String clusterId = StringUtils.isBlank((String)this.projectKey) ? "__builtin__" : new ClusterSelector().getClusterForProject(this.projectKey, Cluster.ClusterArchitecture.HADOOP);
            String k8sClusterId = StringUtils.isBlank((String)this.projectKey) ? "__builtin__" : new ClusterSelector().getClusterForProject(this.projectKey, Cluster.ClusterArchitecture.KUBERNETES);
            ClusterSettings clusterSettings = StringUtils.isNotBlank((String)clusterId) ? clusterSelector.selectForCluster(this.authCtx, clusterId, k8sClusterId) : clusterSelector.selectGlobal();
            HadoopSettings hadoopSettings = clusterSettings.getHadoopSettings();
            ClusterPropertyList cpl = clusterSettings.getSparkSettings().getByName((String)"default").conf;
            ArrayList<SimpleKeyValue> list = new ArrayList<SimpleKeyValue>();
            for (ClusterProperty cp : cpl) {
                list.add(new SimpleKeyValue(cp.key, cp.value));
            }
            DSSConnection connection = this.datasetHandler.getHDFSAbleConnection();
            ImpersonationResolverService resolver = (ImpersonationResolverService)SpringUtils.getBean(ImpersonationResolverService.class);
            boolean bl = checkSecurity = resolver.isEnabled() || DKUApp.getProperty((String)"security.hideConnectionsInSingleUserSecurity", (String)"false").equalsIgnoreCase("true");
            if (checkSecurity && !connection.detailsReadableBy(this.authCtx)) {
                throw new SecurityException("You are not to read details of the connection " + connection.name + ", can't read Delta Lake");
            }
            SparkSQLConnectionPoolService connPoolService = (SparkSQLConnectionPoolService)SpringUtils.getBean(SparkSQLConnectionPoolService.class);
            try (SparkSQLConnectionPoolService.SparkSQLConnection conn = connPoolService.take(clusterId, k8sClusterId, this.authCtx, "default", list, this.projectKey);){
                int i;
                int i2;
                List columns;
                SamplingParam sampling;
                if (limit != null && limit.maxRecords > 0L) {
                    sampling = StreamableDatasetSelection.newHEADOnly();
                    sampling.maxRecords = limit.maxRecords;
                    logger.info((Object)("Limit reading to " + limit.maxRecords + " rows"));
                } else {
                    sampling = null;
                }
                DkuDatasetReadPreparedStatement ps2 = new DkuDatasetReadPreparedStatement(conn.getLivyConnection(), 2000, this.authCtx, hadoopSettings, this.datasetHandler.getDataset(), sampling, null, null, null);
                ps2.execute(null);
                this.schema = ps2.getSchema();
                logger.info((Object)("Got schema from Livy: " + JSON.json((Object)this.schema)));
                Dataset dataset = this.datasetHandler.getDataset();
                ResultSet rs2 = ps2.getResultSet();
                ResultSetMetaData rsmd = rs2.getMetaData();
                logger.info((Object)("Columns in RSMD: " + rsmd.getColumnCount()));
                SparkSQLDialect dialect = new SparkSQLDialect();
                List list2 = columns = dataset != null && dataset.getSchema() != null ? dataset.getSchema().getColumns() : null;
                if (!this.useDatasetSchema) {
                    columns = null;
                }
                logger.info((Object)("Columns from dataset: " + JSON.json((Object)columns)));
                if (columns != null) {
                    if (rsmd.getColumnCount() != columns.size()) {
                        throw ErrorContext.iae((String)"Invalid number of columns in query . Please check dataset schema");
                    }
                    for (int i3 = 1; i3 <= rsmd.getColumnCount(); ++i3) {
                        String schemaName = ((SchemaColumn)dataset.getSchema().getColumns().get(i3 - 1)).getName();
                        if (SchemaUtils.getColumnNameToUse(dialect, rsmd, i3).equalsIgnoreCase(schemaName)) continue;
                        throw ErrorContext.iae((String)"Column name not matching between query and schema");
                    }
                }
                String[] columnNames = new String[rsmd.getColumnCount()];
                Type[] columnTypes = new Type[rsmd.getColumnCount()];
                boolean[] columnTimestampNoTzAsDate = new boolean[rsmd.getColumnCount()];
                if (columns != null) {
                    for (i2 = 0; i2 < columnNames.length; ++i2) {
                        columnNames[i2] = ((SchemaColumn)columns.get(i2)).getName();
                        columnTypes[i2] = ((SchemaColumn)columns.get(i2)).getType();
                        columnTimestampNoTzAsDate[i2] = ((SchemaColumn)columns.get((int)i2)).timestampNoTzAsDate && !dataset.isManaged();
                    }
                } else {
                    DeltaFormat.Config deltaConfig = dataset.getFormatParamsAs(DeltaFormat.Config.class);
                    AbstractSQLDatasetHandler.ReadTemporalMode dateonlyReadMode = deltaConfig.readTemporalMode == DeltaFormat.Config.ReadTemporalMode.AS_DATE ? AbstractSQLDatasetHandler.ReadTemporalMode.AS_DATE : AbstractSQLDatasetHandler.ReadTemporalMode.AS_IS;
                    AbstractSQLDatasetHandler.ReadTemporalMode datetimenotzReadMode = deltaConfig.readTemporalMode == DeltaFormat.Config.ReadTemporalMode.AS_DATE ? AbstractSQLDatasetHandler.ReadTemporalMode.AS_STRING : AbstractSQLDatasetHandler.ReadTemporalMode.AS_IS;
                    for (i = 0; i < columnNames.length; ++i) {
                        String name = SchemaUtils.getColumnNameToUse(dialect, rsmd, i + 1);
                        SchemaColumn column = dialect.fromSQLType(name, rsmd.getColumnType(i + 1), rsmd.getColumnTypeName(i + 1), rsmd.getPrecision(i + 1), rsmd.getScale(i + 1), datetimenotzReadMode, dateonlyReadMode);
                        columnNames[i] = name;
                        columnTypes[i] = column.getType();
                        columnTimestampNoTzAsDate[i] = column.timestampNoTzAsDate && !dataset.isManaged();
                    }
                }
                for (i2 = 1; i2 <= rsmd.getColumnCount(); ++i2) {
                    cf.column(columnNames[i2 - 1]);
                }
                int rowsPushed = 0;
                int rowsBefore = 0;
                while (rs2.next() && (limit == null || limit.maxRecords <= 0L || (long)rowsPushed < limit.maxRecords)) {
                    Row r = rf.row();
                    for (i = 1; i <= rsmd.getColumnCount(); ++i) {
                        String col = columnNames[i - 1];
                        boolean timestampNoTzAsDate = columnTimestampNoTzAsDate[i - 1];
                        SchemaColumn schemaColumn = new SchemaColumn(col, columnTypes[i - 1]);
                        String v = dialect.getValueAsDSSString(rs2, rsmd.getColumnType(i), i, schemaColumn, true, timestampNoTzAsDate, DateTimeZone.UTC);
                        r.put(cf.column(col), v);
                    }
                    if (++rowsPushed % 100 == 0) {
                        if (listener != null) {
                            listener.setData(0L, 0L, rowsBefore + rowsPushed);
                        }
                        FutureProgressState.checkInterrupt();
                        if (rowsPushed % 2000 == 0) {
                            logger.infoV("Read %d records from DB", new Object[]{rowsPushed});
                        }
                    }
                    out.emitRow(r);
                }
                logger.infoV("Done iterating result set, returned %d rows", new Object[]{rowsPushed});
                long l = rowsPushed;
                return l;
            }
        }

        public String getDesc() {
            return "SparkRead";
        }

        @Override
        public Schema getSchema() {
            return this.schema;
        }
    }
}

