/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.recipes.code.hive;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.InfoMessage;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.coremodel.SerializedRecipe;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.dataflow.RecipeRunnableSubgraph;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.datasets.DatasetInspector;
import com.dataiku.dip.datasets.SamplingParam;
import com.dataiku.dip.datasets.fs.HDFSDatasetHandler;
import com.dataiku.dip.exceptions.CodedIOException;
import com.dataiku.dip.expressions.Expression;
import com.dataiku.dip.expressions.GrelToQueryMapping;
import com.dataiku.dip.expressions.GrelToQueryTranslator;
import com.dataiku.dip.expressions.GrelTranslator;
import com.dataiku.dip.hadoop.HDFSPathUtils;
import com.dataiku.dip.hive.HiveSchemaHandler;
import com.dataiku.dip.input.DatasetHandlerFactory;
import com.dataiku.dip.input.formats.csv.CSVFormatConfig;
import com.dataiku.dip.input.formats.csv.CSVFormatExtractor;
import com.dataiku.dip.input.formats.parquet.ParquetFormatConfig;
import com.dataiku.dip.input.formats.parquet.ParquetFormatMeta;
import com.dataiku.dip.output.Output;
import com.dataiku.dip.partitioning.DimensionValue;
import com.dataiku.dip.partitioning.FilePartitioner;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.partitioning.PartitioningScheme;
import com.dataiku.dip.recipes.consistency.RecipeCodes;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.security.impersonation.ImpersonationResolverService;
import com.dataiku.dip.server.SpringUtils;
import com.dataiku.dip.sql.HiveSQLDialect;
import com.dataiku.dip.sql.SQLDialect;
import com.dataiku.dip.sql.SQLUtils;
import com.dataiku.dip.sql.queries.ExpressionBuilder;
import com.dataiku.dip.sql.queries.ExpressionUtils;
import com.dataiku.dip.sql.queries.SelectQueryBuilder;
import com.dataiku.dip.util.DatasetLocUtils;
import com.dataiku.dip.utils.DKULogger;
import com.dataiku.dip.utils.ErrorContext;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.springframework.beans.factory.annotation.Autowired;

public class HiveQLQueryRecipeUtils {
    @Autowired
    private ImpersonationResolverService impersonationService;
    private static DKULogger logger = DKULogger.getLogger((String)"dku.recipes.hiveql");

    public HiveQLQueryRecipeUtils() {
        SpringUtils.getInstance().autowire((Object)this);
    }

    private static List<Dataset> getSourceDatasets(RecipeRunnableSubgraph subgraph, DatasetsDAO datasetsDAO) throws IOException {
        ArrayList sourceDatasets = Lists.newArrayList();
        for (FlowDataset source : subgraph.getSourceDatasets()) {
            Dataset sourceDataset = source.getMandatory(datasetsDAO);
            sourceDatasets.add(sourceDataset);
        }
        return sourceDatasets;
    }

    private static List<Dataset> getSourceDatasets(SerializedRecipe recipe, DatasetsDAO datasetsDAO) throws IOException {
        ArrayList sourceDatasets = Lists.newArrayList();
        for (SerializedRecipe.RecipeInput source : recipe.getInputsForRole("main")) {
            DatasetLocUtils.DatasetLoc loc = DatasetLocUtils.resolveSmart(recipe.projectKey, source.ref);
            SerializedDataset sd = (SerializedDataset)datasetsDAO.getMandatory(loc);
            Dataset sourceDataset = Dataset.fromSerialized(sd);
            sourceDatasets.add(sourceDataset);
        }
        return sourceDatasets;
    }

    public static String getSourceConnection(AuthCtx authCtx, RecipeRunnableSubgraph subgraph) throws IOException {
        DatasetsDAO datasetsDAO = (DatasetsDAO)SpringUtils.getBean(DatasetsDAO.class);
        List<Dataset> sourceDatasets = HiveQLQueryRecipeUtils.getSourceDatasets(subgraph, datasetsDAO);
        return HiveQLQueryRecipeUtils.getSourceConnection(authCtx, sourceDatasets);
    }

    public static String getSourceConnection(AuthCtx authCtx, SerializedRecipe recipe) throws IOException {
        DatasetsDAO datasetsDAO = (DatasetsDAO)SpringUtils.getBean(DatasetsDAO.class);
        List<Dataset> sourceDatasets = HiveQLQueryRecipeUtils.getSourceDatasets(recipe, datasetsDAO);
        return HiveQLQueryRecipeUtils.getSourceConnection(authCtx, sourceDatasets);
    }

    private static String getSourceConnection(AuthCtx authCtx, List<Dataset> sourceDatasets) throws IOException {
        String sourceConnection = null;
        Dataset sourceConnectionDS = null;
        for (Dataset sourceDataset : sourceDatasets) {
            HiveSchemaHandler.HiveCompatibilityStatus status = HiveSchemaHandler.isCompatible(authCtx, sourceDataset);
            if (!status.compatible) {
                throw ErrorContext.iaef((String)"Dataset %s cannot be used for Hive/Impala: %s", (Object)sourceDataset.getFullName(), (Object[])new Object[]{status.reason});
            }
            String sourceDSConnection = HiveSchemaHandler.getConnectionForHiveFromDataset(sourceDataset);
            if (sourceConnection == null) {
                sourceConnection = sourceDSConnection;
                sourceConnectionDS = sourceDataset;
                continue;
            }
            if (sourceConnection.equals(sourceDSConnection)) continue;
            assert (sourceConnectionDS != null);
            throw ErrorContext.iaef((String)"Mismatch in input connections : can't have a Hive/Impala recipe use both %s (for %s) and %s (for %s)", (Object)sourceConnection, (Object[])new Object[]{sourceConnectionDS.getFullName(), sourceDSConnection, sourceDataset.getFullName()});
        }
        return sourceConnection;
    }

    public static String getMainConnection(AuthCtx authCtx, RecipeRunnableSubgraph subgraph) throws IOException {
        DatasetsDAO datasetsDAO = (DatasetsDAO)SpringUtils.getBean(DatasetsDAO.class);
        List<Dataset> sourceDatasets = HiveQLQueryRecipeUtils.getSourceDatasets(subgraph, datasetsDAO);
        List<String> connections = HiveQLQueryRecipeUtils.getConnections(authCtx, sourceDatasets);
        if (connections.size() == 0) {
            throw ErrorContext.iae((String)"The recipe has no input dataset of type HDFS that could provide the connection to use");
        }
        return connections.get(0);
    }

    public static String getMainConnection(AuthCtx authCtx, SerializedRecipe recipe) throws IOException {
        DatasetsDAO datasetsDAO = (DatasetsDAO)SpringUtils.getBean(DatasetsDAO.class);
        List<Dataset> sourceDatasets = HiveQLQueryRecipeUtils.getSourceDatasets(recipe, datasetsDAO);
        List<String> connections = HiveQLQueryRecipeUtils.getConnections(authCtx, sourceDatasets);
        if (connections.size() == 0) {
            throw ErrorContext.iae((String)"The recipe has no input dataset of type HDFS that could provide the connection to use");
        }
        return connections.get(0);
    }

    private static List<String> getConnections(AuthCtx authCtx, List<Dataset> sourceDatasets) throws IOException {
        HashSet sourceConnections = Sets.newHashSet();
        for (Dataset sourceDataset : sourceDatasets) {
            HiveSchemaHandler.HiveCompatibilityStatus status = HiveSchemaHandler.isCompatible(authCtx, sourceDataset);
            if (!status.compatible) {
                throw ErrorContext.iaef((String)"Dataset %s cannot be used for Hive/Impala: %s", (Object)sourceDataset.getFullName(), (Object[])new Object[]{status.reason});
            }
            sourceConnections.add(HiveSchemaHandler.getConnectionForHiveFromDataset(sourceDataset));
        }
        return Lists.newArrayList((Iterable)sourceConnections);
    }

    public static String runsInSQLMode(AuthCtx authCtx, String sourceConnection, Dataset outputDataset, String sqlQuery) {
        String rejectionReason;
        HiveSchemaHandler.HiveCompatibilityStatus status = HiveSchemaHandler.isCompatible(authCtx, outputDataset);
        if (status.compatible) {
            if (sqlQuery.toUpperCase().contains("SELECT")) {
                if (HiveQLQueryRecipeUtils.checkDatasetFormat(outputDataset)) {
                    logger.info((Object)"The Hive/Impala query recipe can be run in full-SQL mode");
                    return null;
                }
                rejectionReason = "Impala cannot write to the format and compression options of the output dataset.";
            } else {
                rejectionReason = "no SELECT statement found";
            }
        } else {
            rejectionReason = "output is not HDFS";
        }
        logger.info((Object)("The Hive/Impala query recipe cannot run in full-SQL mode : " + rejectionReason));
        return rejectionReason;
    }

    private static boolean checkDatasetFormat(Dataset outputDataset) {
        if (!DatasetInspector.isHive(outputDataset)) {
            if (outputDataset.getFormatType().equals(ParquetFormatMeta.META.getType())) {
                if (outputDataset.getFormatParamsAs(ParquetFormatConfig.class).parquetCompressionMethod == ParquetFormatConfig.CompressionMethod.LZO) {
                    logger.info((Object)"Impala cannot write lzo-compressed parquet files.");
                    return false;
                }
            } else if (outputDataset.getFormatType().equals(CSVFormatExtractor.META.getType())) {
                if (!StringUtils.isEmpty((String)outputDataset.getFormatParamsAs(CSVFormatConfig.class).compress)) {
                    logger.info((Object)("Impala cannot write compressed text files (dataset is using " + outputDataset.getFormatParamsAs(CSVFormatConfig.class).compress + ")."));
                    return false;
                }
            } else {
                logger.info((Object)("Impala can only write to Parquet, Text  formats, not " + outputDataset.getFormatType() + "."));
                return false;
            }
        }
        return true;
    }

    public static String insertOverwriteFragment(Dataset targetDataset, Partition targetPart, Output.WriteMode writeMode, boolean prefixWithDb) throws IOException {
        SQLUtils.SQLTable tableRef = HiveSchemaHandler.getResolvedHiveTableRefFromDataset(targetDataset);
        String insertPart = "INSERT " + (writeMode == Output.WriteMode.APPEND ? "INTO" : "OVERWRITE") + " TABLE ";
        if (prefixWithDb && !StringUtils.isBlank((String)tableRef.getSchemaNullIfBlank())) {
            insertPart = insertPart + "`" + tableRef.getSchemaNullIfBlank() + "`.";
        }
        insertPart = insertPart + "`" + tableRef.getTable() + "` ";
        if (targetDataset.getPartitioningSchema().isPartitioned()) {
            Object partitionedBy = "PARTITION (";
            for (String dimName : targetDataset.getPartitioningSchema().getDimensionNames()) {
                DimensionValue value = (DimensionValue)targetPart.getDimensionValues().get(dimName);
                partitionedBy = (String)partitionedBy + "`" + dimName + "`='" + value.id() + "',";
            }
            partitionedBy = ((String)partitionedBy).substring(0, ((String)partitionedBy).length() - 1);
            partitionedBy = (String)partitionedBy + ")";
            insertPart = insertPart + (String)partitionedBy;
        }
        return insertPart;
    }

    public static String insertOverwriteFragment(Dataset targetDataset, PartitioningScheme targetPartitionScheme, Output.WriteMode writeMode, boolean prefixWithDb) throws IOException {
        SQLUtils.SQLTable tableRef = HiveSchemaHandler.getResolvedHiveTableRefFromDataset(targetDataset);
        String insertPart = "INSERT " + (writeMode == Output.WriteMode.APPEND ? "INTO" : "OVERWRITE") + " TABLE ";
        if (prefixWithDb && !StringUtils.isBlank((String)tableRef.getSchemaNullIfBlank())) {
            insertPart = insertPart + "`" + tableRef.getSchemaNullIfBlank() + "`.";
        }
        insertPart = insertPart + "`" + tableRef.getTable() + "` ";
        if (targetDataset.getPartitioningSchema().isPartitioned() && targetPartitionScheme != null) {
            Object partitionedBy = "PARTITION (";
            for (String dimName : targetPartitionScheme.getDimensionNames()) {
                partitionedBy = (String)partitionedBy + "`" + dimName + "`= '${hiveconf:DKU_DST_" + dimName + "}',";
            }
            partitionedBy = ((String)partitionedBy).substring(0, ((String)partitionedBy).length() - 1);
            partitionedBy = (String)partitionedBy + ")";
            insertPart = insertPart + (String)partitionedBy;
        }
        return insertPart;
    }

    public void prepareTargetDirectoryForOverwriteByImpala(AuthCtx authCtx, Dataset targetDataset, Partition targetPartition) throws Exception {
        try (final HDFSDatasetHandler handler = (HDFSDatasetHandler)DatasetHandlerFactory.build(authCtx, targetDataset);){
            if (this.impersonationService.isEnabled()) {
                logger.infoV("Preparing for overwrite by Impala %s (p=%s): clear it", new Object[]{targetDataset.getFullName(), targetPartition});
                handler.clearPartitions(Lists.newArrayList((Object[])new Partition[]{targetPartition}));
            } else {
                logger.infoV("Preparing for overwrite by Impala %s (p=%s): deleting existing files", new Object[]{targetDataset.getFullName(), targetPartition});
                String finalLocation = HDFSPathUtils.concat(handler.getFullyQualifiedRootPath(), FilePartitioner.computePartitionRelPathAsFolder(targetPartition, targetDataset.getPartitioningSchema()));
                final Path finalLocationPath = new Path(finalLocation);
                handler.makeUGI().doAs((PrivilegedExceptionAction)new PrivilegedExceptionAction<Void>(){

                    @Override
                    public Void run() throws IOException, InterruptedException {
                        FileSystem fs = handler.getImpersonatedFS();
                        if (fs.exists(finalLocationPath)) {
                            if (fs.isDirectory(finalLocationPath)) {
                                RemoteIterator rmIterator = fs.listFiles(finalLocationPath, false);
                                while (rmIterator.hasNext()) {
                                    Path path = ((LocatedFileStatus)rmIterator.next()).getPath();
                                    fs.delete(path, true);
                                }
                            } else {
                                fs.delete(finalLocationPath, true);
                            }
                        }
                        return null;
                    }
                });
            }
        }
    }

    public static boolean canGetDatasetAsSelect(Dataset dataset, Schema schema, SamplingParam sampling, String filter) {
        if (!DatasetInspector.isHDFSDatasetOrHiveTableDataset(dataset)) {
            logger.info((Object)"Not a hive-able dataset");
            return false;
        }
        if (sampling != null && sampling.samplingMethod != SamplingParam.SamplingMethod.FULL && sampling.samplingMethod != SamplingParam.SamplingMethod.HEAD_SEQUENTIAL) {
            logger.info((Object)"Sampling not amenable to sql translation");
            return false;
        }
        if (StringUtils.isNotBlank((String)filter)) {
            try {
                HiveSQLDialect dialect = new HiveSQLDialect();
                Expression expression = new Expression(filter, schema);
                GrelToQueryTranslator translator = new GrelToQueryTranslator((GrelTranslator.GrelMapping)new GrelToQueryMapping(dialect), schema);
                GrelTranslator.TranslationResult<ExpressionBuilder> res = translator.translateToQuery(expression, true);
                if (!res.isFullyTranslated) {
                    logger.info((Object)"Filter not amenable to sql translation");
                    return false;
                }
            }
            catch (Exception e) {
                logger.info((Object)"Filter not amenable to sql translation (error)");
                return false;
            }
        }
        return true;
    }

    public static String getDatasetAsSelect(Dataset dataset, Schema schema, List<Partition> partitions, SamplingParam sampling, String filter) throws IOException {
        if (!DatasetInspector.isHDFSDatasetOrHiveTableDataset(dataset)) {
            throw new CodedIOException((InfoMessage.MessageCode)RecipeCodes.ERR_RECIPE_INCONSISTENT_I_O, "Dataset '" + dataset.getFullName() + "' cannot be converted to a SELECT statement against Hive");
        }
        SQLUtils.SQLTable tableRef = HiveSchemaHandler.getResolvedHiveTableRefFromDataset(dataset);
        HiveSQLDialect dialect = new HiveSQLDialect();
        SelectQueryBuilder q = new SelectQueryBuilder();
        ExpressionBuilder.ExpressionBuilderFactory ef = new ExpressionBuilder.ExpressionBuilderFactory();
        for (SchemaColumn c2 : schema.columns) {
            q.select(ExpressionUtils.getAdjustedColumn(ef.col(c2.getName()), c2, dataset, (SQLDialect)dialect));
        }
        q.from(new SQLUtils.SQLTable(tableRef.getCatalog(), tableRef.getSchemaNullIfBlank(), tableRef.getTable(), true), tableRef.getTable());
        if (partitions != null && !partitions.isEmpty() && dataset.getPartitioningSchema() != null && dataset.getPartitioningSchema().isPartitioned()) {
            q.where(ExpressionUtils.getPartitionFilterClause(dataset.getPartitioningSchema(), dataset, partitions, (SQLDialect)dialect));
        }
        if (sampling != null) {
            switch (sampling.samplingMethod) {
                case FULL: {
                    break;
                }
                case HEAD_SEQUENTIAL: {
                    q.limit(sampling.maxRecords);
                    break;
                }
                default: {
                    throw new Error("unreachable");
                }
            }
        }
        if (StringUtils.isNotBlank((String)filter)) {
            Expression expression = new Expression(filter, schema);
            GrelToQueryTranslator translator = new GrelToQueryTranslator((GrelTranslator.GrelMapping)new GrelToQueryMapping(dialect), schema);
            GrelTranslator.TranslationResult<ExpressionBuilder> res = translator.translateToQuery(expression, true);
            q.where((ExpressionBuilder)res.result);
        }
        return q.toSQL(dialect);
    }
}

