/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.hive;

import com.dataiku.dip.ApplicationConfigurator;
import com.dataiku.dip.cluster.HiveSettings;
import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.SerializedRecipe;
import com.dataiku.dip.coremodel.SimpleKeyValue;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.dataflow.RecipeRunnableSubgraph;
import com.dataiku.dip.dataflow.RunnableSubgraph;
import com.dataiku.dip.dataflow.graph.FlowDataset;
import com.dataiku.dip.dataflow.utils.FlowVariables;
import com.dataiku.dip.datasets.DatasetInspector;
import com.dataiku.dip.hadoop.MapredCompressionSetter;
import com.dataiku.dip.hive.HiveLocalMetastoreTablesHandler;
import com.dataiku.dip.hive.HiveMetastoreTablesHandler;
import com.dataiku.dip.hive.HiveSchemaHandler;
import com.dataiku.dip.output.Output;
import com.dataiku.dip.partitioning.Partition;
import com.dataiku.dip.recipes.code.hive.HiveQLQueryRecipeUtils;
import com.dataiku.dip.security.AuthCtx;
import com.dataiku.dip.sql.HiveSQLDialect;
import com.dataiku.dip.sql.SQLUtils;
import com.dataiku.dip.sql.queries.Splitter;
import com.dataiku.dip.utils.DKUtils;
import com.dataiku.dip.utils.ErrorContext;
import com.dataiku.dip.variables.VariablesContext;
import com.dataiku.hproxy.utils.HiveUtils;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.text.StrLookup;
import org.apache.commons.lang.text.StrSubstitutor;
import org.apache.log4j.Logger;

public class HiveScriptPreprocessor {
    private final DatasetsDAO datasetsDAO;
    private RunnableSubgraph subgraph;
    private VariablesContext context;
    private boolean validationMode = false;
    private StringBuilder initQuery = new StringBuilder();
    private String hiveQuery;
    private Map<String, String> additionalVariables = new HashMap<String, String>();
    private Map<String, String> tableNameSubsts = new HashMap<String, String>();
    private Map<String, FlowDataset> targetsByTableName = new HashMap<String, FlowDataset>();
    private boolean forceInsertOverwrite;
    private boolean suppressOutputTablesCreation;
    private List<String> ignoredOutputTables = new ArrayList<String>();
    private final HiveMetastoreTablesHandler tablesHandler;
    private boolean addDkuUdf;
    private final AuthCtx authCtx;
    private final HiveSettings hiveSettings;
    private boolean queryMayBeUsingWith;
    private static Logger logger = Logger.getLogger((String)"dku.flow.hive");

    public HiveScriptPreprocessor(AuthCtx authCtx, DatasetsDAO dao, VariablesContext context, RunnableSubgraph subgraph, String hiveQuery, HiveMetastoreTablesHandler tablesHandler, boolean addDkuUdf, HiveSettings hiveSettings) {
        this.authCtx = authCtx;
        this.datasetsDAO = dao;
        this.context = context;
        this.subgraph = subgraph;
        this.hiveQuery = hiveQuery;
        this.tablesHandler = tablesHandler;
        this.addDkuUdf = addDkuUdf;
        this.hiveSettings = hiveSettings;
    }

    public AuthCtx getAuthCtx() {
        return this.authCtx;
    }

    public void setValidationMode(boolean enabled) {
        this.validationMode = enabled;
    }

    public void setForceInsertOverwrite(boolean forceInsertOverwrite) {
        this.forceInsertOverwrite = forceInsertOverwrite;
    }

    public void setSuppressOutputTablesCreation(boolean suppressOutputTablesCreation) {
        this.suppressOutputTablesCreation = suppressOutputTablesCreation;
    }

    public String getInitQuery() {
        return this.initQuery.toString();
    }

    public String getQuery() {
        return this.hiveQuery;
    }

    public boolean queryMayBeUsingWith() {
        return this.queryMayBeUsingWith;
    }

    public static String getHiveJarScript(boolean addDkuUdf, HiveSettings hiveSettings) {
        String hiveCP;
        StringBuilder sb = new StringBuilder();
        String installDir = ApplicationConfigurator.getInstallFolder();
        if (addDkuUdf) {
            String dkuJar = installDir + "/lib/third/dataiku-hive-udf.jar";
            if (!new File(dkuJar).exists()) {
                logger.warn((Object)"Unable to find dataiku-hive-udf.jar");
            } else if (dkuJar.contains(" ")) {
                logger.warn((Object)"Path to dataiku-hive-udf.jar cannot contain spaces'");
            } else {
                sb.append("ADD JAR " + dkuJar.replace(";", "\\;") + ";\nCREATE TEMPORARY FUNCTION DKU_Collect_To_Array AS 'com.dataiku.hive.udf.arrays.UDAFCollectToArray';\nCREATE TEMPORARY FUNCTION DKU_Array_Count_Distinct AS 'com.dataiku.hive.udf.arrays.UDFArrayCountDistinct';\nCREATE TEMPORARY FUNCTION DKU_Array_Count_Equals AS 'com.dataiku.hive.udf.arrays.UDFArrayCountEquals';\nCREATE TEMPORARY FUNCTION DKU_Array_Get AS 'com.dataiku.hive.udf.arrays.UDFArrayGet';\nCREATE TEMPORARY FUNCTION DKU_Array_Int_Sum AS 'com.dataiku.hive.udf.arrays.UDFArrayIntSum';\nCREATE TEMPORARY FUNCTION DKU_Array_Join AS 'com.dataiku.hive.udf.arrays.UDFArrayJoin';\nCREATE TEMPORARY FUNCTION DKU_Array_Sub_Sequences AS 'com.dataiku.hive.udf.arrays.UDFArraySubSequences';\nCREATE TEMPORARY FUNCTION DKU_Count_Distinct_To_Map AS 'com.dataiku.hive.udf.maps.UDAFCountDistinctToMap';\nCREATE TEMPORARY FUNCTION DKU_Map_Group_Sum AS 'com.dataiku.hive.udf.maps.UDAFMapGroupSum';\nCREATE TEMPORARY FUNCTION DKU_Count_To_Map AS 'com.dataiku.hive.udf.maps.UDFCountToMap';\nCREATE TEMPORARY FUNCTION DKU_Map_Value_Filter_Lower_Than AS 'com.dataiku.hive.udf.maps.UDFMapValueFilterLowerThan';\nCREATE TEMPORARY FUNCTION DKU_Map_Value_Filter_Lower_Than AS 'com.dataiku.hive.udf.maps.UDFMapValueFilterLowerThan';\nCREATE TEMPORARY FUNCTION DKU_Map_Value_Filter_Top_N AS 'com.dataiku.hive.udf.maps.UDFMapValueFilterTopN';\nCREATE TEMPORARY FUNCTION DKU_Exponential_Smoothing_Moving_Average AS 'com.dataiku.hive.udf.maths.UDFExponentialSmoothingMovingAverage';\nCREATE TEMPORARY FUNCTION DKU_String_Sub_Sequences AS 'com.dataiku.hive.udf.strings.UDFStringSubSequences';\nCREATE TEMPORARY FUNCTION DKU_Rank AS 'com.dataiku.hive.udf.window.Rank';\nCREATE TEMPORARY FUNCTION DKU_Last_Of_Group_According_To AS 'com.dataiku.hive.udf.window.UDAFLastOfGroupAccordingTo';\nCREATE TEMPORARY FUNCTION DKU_First_Of_Group_According_To AS 'com.dataiku.hive.udf.window.UDAFFirstOfGroupAccordingTo';\n");
            }
        }
        if (StringUtils.isNotBlank((String)hiveSettings.auxJarsAndUdfs)) {
            sb.append("\n");
            sb.append(hiveSettings.auxJarsAndUdfs);
            sb.append("\n;\n");
        }
        if ((hiveCP = System.getenv("DKU_HIVE_CP")) != null) {
            String[] hiveJars = DKUtils.parseClassPath((String)hiveCP);
            Pattern avroJarPattern = Pattern.compile("\\/avro-[0-9.]+\\.jar$");
            for (String jarPath : hiveJars) {
                if (!avroJarPattern.matcher(jarPath).find()) continue;
                sb.append("ADD JAR " + jarPath.replace(";", "\\;") + ";\n");
                break;
            }
        }
        return sb.toString();
    }

    public List<String> getIgnoredOutputTables() {
        return this.ignoredOutputTables;
    }

    public Map<String, FlowDataset> getTargetsByTableName() {
        return this.targetsByTableName;
    }

    public void preprocess() throws Exception {
        HiveSchemaHandler.HiveCompatibilityStatus status;
        Dataset dataset;
        if (this.subgraph.getTargets().size() == 0) {
            throw ErrorContext.iae((String)"At least one output dataset is required");
        }
        if (this.subgraph.getSources().size() == 0) {
            throw ErrorContext.iae((String)"At least one input dataset is required");
        }
        this.initQuery.append(HiveScriptPreprocessor.getHiveJarScript(this.addDkuUdf, this.hiveSettings));
        if (StringUtils.isNotBlank((String)this.hiveSettings.initQueryPreStatements)) {
            this.initQuery.append("\n");
            this.initQuery.append(this.hiveSettings.initQueryPreStatements);
            this.initQuery.append("\n;\n");
        }
        FlowVariables.addPartitioningVariables(this.authCtx, this.additionalVariables, this.subgraph, this.datasetsDAO);
        HiveSQLDialect dialect = new HiveSQLDialect();
        FlowVariables.addWhereClauseVariables(this.additionalVariables, this.subgraph, this.datasetsDAO, dialect);
        boolean isForValidation = this.tablesHandler instanceof HiveLocalMetastoreTablesHandler;
        SerializedRecipe recipe = ((RecipeRunnableSubgraph)this.subgraph).getRecipe().getModel();
        this.additionalVariables.putAll(FlowVariables.getSQLTableVariables(this.authCtx, recipe, isForValidation));
        this.additionalVariables.putAll(this.context.getAllVariables());
        MapredCompressionSetter compressionSetter = new MapredCompressionSetter();
        List<Object> targetsDatasets = this.suppressOutputTablesCreation ? new ArrayList() : this.subgraph.getTargetsDatasets();
        for (FlowDataset flowDataset : targetsDatasets) {
            logger.info((Object)("Preprocessing for output " + flowDataset.getFullName()));
            dataset = flowDataset.getMandatory(this.datasetsDAO);
            status = HiveSchemaHandler.isCompatible(this.authCtx, dataset);
            if (!status.compatible) {
                throw ErrorContext.iaef((String)"Dataset %s cannot be used for Hive: %s", (Object)dataset.getFullName(), (Object[])new Object[]{status.reason});
            }
            SQLUtils.SQLTable tableRef = HiveSchemaHandler.getResolvedHiveTableRefFromDataset(dataset);
            this.targetsByTableName.put(tableRef.getTable(), flowDataset);
            this.tableNameSubsts.put(flowDataset.getFullName(), tableRef.getTable());
            Partition targetPart = this.subgraph.getTargetPartitions().get(flowDataset.getFullName());
            this.tablesHandler.preProcessTarget(flowDataset, dataset, tableRef.getTable(), targetPart);
            compressionSetter.inspectOutputDataset(dataset);
            if (!dataset.getSchema().getColumns().isEmpty()) continue;
            if (this.validationMode) {
                logger.info((Object)("Output " + flowDataset.getFullName() + " has no schema"));
                this.ignoredOutputTables.add(tableRef.getTable());
                continue;
            }
            throw ErrorContext.iaef((String)"The schema of the output dataset '%s' is empty", (Object)flowDataset.getFullName(), (Object[])new Object[0]);
        }
        for (FlowDataset flowDataset : this.subgraph.getSourceDatasets()) {
            String tableName;
            logger.info((Object)("Preprocessing for input " + flowDataset.getFullName()));
            dataset = flowDataset.getMandatory(this.datasetsDAO);
            status = HiveSchemaHandler.isCompatible(this.authCtx, dataset);
            if (!status.compatible) {
                throw ErrorContext.iaef((String)"Dataset %s cannot be used for Hive: %s", (Object)flowDataset.getFullName(), (Object[])new Object[]{status.reason});
            }
            if (dataset.getSchema() == null || dataset.getSchema().getColumns().size() == 0) {
                throw ErrorContext.iaef((String)"Input dataset %s has no schema. ", (Object)dataset.getFullName(), (Object[])new Object[0]);
            }
            if (DatasetInspector.isHiveQuery(dataset)) {
                tableName = dataset.getName();
            } else {
                SQLUtils.SQLTable tableRef = HiveSchemaHandler.getResolvedHiveTableRefFromDataset(dataset);
                tableName = tableRef.getTable();
            }
            this.tableNameSubsts.put(flowDataset.getFullName(), tableName);
            String[] sourceParts = this.subgraph.getSourcePartitions(flowDataset);
            this.tablesHandler.preProcessSource(flowDataset, dataset, tableName, (List<Partition>)sourceParts);
        }
        if (this.hiveSettings.addCompressionCommands) {
            logger.info((Object)"Add commands to specify compression on outputs");
            this.initQuery.append(compressionSetter.getCompressionCommands(false));
        } else {
            logger.info((Object)"No compression command added");
        }
        this.initQuery.append(this.tablesHandler.getCreationScript());
        this.initQuery.append("\n");
        if (StringUtils.isNotBlank((String)this.hiveSettings.initQueryPostStatements)) {
            this.initQuery.append("\n");
            this.initQuery.append(this.hiveSettings.initQueryPostStatements);
            this.initQuery.append("\n;\n");
        }
        StrSubstitutor subs = new StrSubstitutor(new StrLookup(){

            public String lookup(String s) {
                if (s.startsWith("hiveconf:")) {
                    return HiveScriptPreprocessor.this.additionalVariables.get(s.substring("hiveconf:".length()));
                }
                return HiveScriptPreprocessor.this.additionalVariables.get(s);
            }
        });
        String string = subs.replace(this.hiveQuery);
        Splitter splitter = new Splitter(dialect.getSemicolonExclusionPortionFinders());
        String[] substitutedHiveQueries = splitter.splitNoTrim(string);
        ArrayList cleanSubstitutedHiveQueries = Lists.newArrayList();
        for (String query : substitutedHiveQueries) {
            cleanSubstitutedHiveQueries.add(query.replace(";", "\\;"));
        }
        String string2 = Joiner.on((String)";").join((Iterable)cleanSubstitutedHiveQueries);
        StringBuilder processedScript = new StringBuilder();
        boolean firstSelectReplaced = false;
        boolean foundAtLeastOneSelect = false;
        boolean foundAtLeastOneWith = false;
        Pattern spacePattern = Pattern.compile("^(\\s*)(.*)$", 32);
        for (Object command : HiveUtils.splitCommandsPreserveEscaping((String)string2)) {
            Matcher spaceMatcher = spacePattern.matcher((CharSequence)command);
            if (!spaceMatcher.matches()) continue;
            String leadingSpaces = spaceMatcher.group(1);
            command = spaceMatcher.group(2);
            foundAtLeastOneSelect |= StringUtils.containsIgnoreCase((String)command, (String)"SELECT");
            foundAtLeastOneWith |= StringUtils.startsWithIgnoreCase((String)command, (String)"WITH");
            for (Map.Entry<String, String> subst : this.tableNameSubsts.entrySet()) {
                command = ((String)command).replaceAll("([^A-Za-z0-9_])" + subst.getKey() + "([^A-Za-z0-9_])", "$1" + subst.getValue() + "$2");
            }
            if (StringUtils.startsWith((String)command, (String)"DKU_INSERT")) {
                if (firstSelectReplaced) {
                    throw ErrorContext.iae((String)"Implicit insertion (or DKU_INSERT) can be used only once");
                }
                command = ((String)command).replaceFirst("DKU_INSERT", this.insertOverwriteFragment(Output.WriteMode.OVERWRITE));
                firstSelectReplaced = true;
            } else if (this.forceInsertOverwrite && StringUtils.startsWithIgnoreCase((String)command, (String)"SELECT") && !firstSelectReplaced) {
                if (this.subgraph.getTargets().size() > 1) {
                    throw ErrorContext.iae((String)"Query has multiple targets but no insert statement");
                }
                command = this.insertOverwriteFragment(Output.WriteMode.OVERWRITE) + " " + (String)command;
                firstSelectReplaced = true;
            }
            command = leadingSpaces + (String)command;
            processedScript.append((String)command + ";");
        }
        if (!foundAtLeastOneSelect) {
            throw ErrorContext.iae((String)"Query has no SELECT statement");
        }
        this.queryMayBeUsingWith = foundAtLeastOneWith && !firstSelectReplaced;
        this.hiveQuery = processedScript.toString();
    }

    protected String insertOverwriteFragment(Output.WriteMode writeMode) throws IOException {
        FlowDataset target = this.subgraph.getTargetsDatasets().get(0);
        Dataset targetDataset = target.getMandatory(this.datasetsDAO);
        return HiveQLQueryRecipeUtils.insertOverwriteFragment(targetDataset, this.subgraph.getTargetPartitions().get(targetDataset.getFullName()), writeMode, this.tablesHandler.shouldPrefixWithDbInInsert());
    }

    public static boolean checkIfEngineForcedToMR(String[] statements) {
        Pattern setToMr = Pattern.compile("^\\s*set\\s+hive\\.execution\\.engine\\s*=\\s*mr\\s*;?\\s*$", 2);
        Pattern allowedBeforeSetToMr = Pattern.compile("^\\s*(add\\s+jar|create\\s+temporary\\s+function|use)\\s+.*$", 2);
        for (String statement : statements) {
            if (setToMr.matcher(statement).matches()) {
                return true;
            }
            if (allowedBeforeSetToMr.matcher(statement).matches()) continue;
            return false;
        }
        return false;
    }

    public static List<SimpleKeyValue> getFSExtraConfForHiveserver2(List<SimpleKeyValue> fsExtraConf) {
        String blacklistPropName = "dku.hiveserver2.properties.blacklist";
        String whilelistPropName = "dku.hiveserver2.properties.whitelist";
        String blacklist = ApplicationConfigurator.getParams().getParam(blacklistPropName, "^(fs|yarn)\\..*$");
        String whitelist = ApplicationConfigurator.getParams().getParam(whilelistPropName, "");
        for (SimpleKeyValue skv : fsExtraConf) {
            if (blacklistPropName.equals(skv.key)) {
                blacklist = skv.value;
            }
            if (!whilelistPropName.equals(skv.key)) continue;
            whitelist = skv.value;
        }
        Pattern blacklistPattern = StringUtils.isNotBlank((String)blacklist) ? Pattern.compile(blacklist) : null;
        Pattern whitelistPattern = StringUtils.isNotBlank((String)whitelist) ? Pattern.compile(whitelist) : null;
        HashSet obviousBlacklist = Sets.newHashSet((Object[])new String[]{blacklistPropName, whilelistPropName});
        ArrayList<SimpleKeyValue> clean = new ArrayList<SimpleKeyValue>();
        for (SimpleKeyValue skv : fsExtraConf) {
            boolean keep;
            boolean bl = keep = !obviousBlacklist.contains(skv.key);
            if (whitelistPattern != null && !whitelistPattern.matcher(skv.key).matches()) {
                keep = false;
            }
            if (blacklistPattern != null && blacklistPattern.matcher(skv.key).matches()) {
                keep = false;
            }
            if (!keep) continue;
            clean.add(skv);
        }
        return clean;
    }
}

