/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.spark;

import com.dataiku.dip.SparkSQLLikeJob;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.dataflow.exec.split.SplitRecipeJobDef;
import com.dataiku.dip.dataflow.exec.split.SplitRecipePayloadParams;
import com.dataiku.dip.output.Output;
import com.dataiku.dip.spark.FlowDatasetRef;
import com.dataiku.dip.spark.GlobalMetastoreHiveConfig$;
import com.dataiku.dip.spark.InputDatasetsReadParams;
import com.dataiku.dip.spark.LocalMetastoreHiveConfig$;
import com.dataiku.dip.spark.StdDataikuSparkContext;
import com.dataiku.dip.spark.package$;
import com.dataiku.dss.spark.DataikuSparkContext;
import java.io.File;
import java.io.Serializable;
import org.apache.log4j.Logger;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.GenIterable;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.Buffer$;
import scala.math.Numeric;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

public final class SparkSplitRecipeJob$
implements SparkSQLLikeJob {
    public static SparkSplitRecipeJob$ MODULE$;
    private final Logger logger;
    private File runDir;
    private volatile boolean bitmap$0;

    static {
        new SparkSplitRecipeJob$();
    }

    @Override
    public void main(String[] args) {
        SparkSQLLikeJob.main$(this, args);
    }

    @Override
    public Dataset<Row> openInputDataset(SparkContext sc, SQLContext ssc, StdDataikuSparkContext dsc, FlowDatasetRef input, InputDatasetsReadParams allReadParams) {
        return SparkSQLLikeJob.openInputDataset$(this, sc, ssc, dsc, input, allReadParams);
    }

    @Override
    public void saveOutputDataset(DataikuSparkContext dsc, Dataset<Row> df, String fullName, String partition, boolean writeSchema, Output.WriteMode writeMode) {
        SparkSQLLikeJob.saveOutputDataset$(this, dsc, df, fullName, partition, writeSchema, writeMode);
    }

    private File runDir$lzycompute() {
        SparkSplitRecipeJob$ sparkSplitRecipeJob$ = this;
        synchronized (sparkSplitRecipeJob$) {
            if (!this.bitmap$0) {
                this.runDir = SparkSQLLikeJob.runDir$(this);
                this.bitmap$0 = true;
            }
        }
        return this.runDir;
    }

    @Override
    public File runDir() {
        if (!this.bitmap$0) {
            return this.runDir$lzycompute();
        }
        return this.runDir;
    }

    @Override
    public SparkContext run(String descFilePath) {
        Tuple3<SparkContext, SQLContext, Object> tuple3;
        String ENRICHED_INPUT_TEMP_TABLE_NAME = "__dku_enriched_input";
        this.logger().info((Object)"Reading job descriptor");
        SplitRecipeJobDef desc = (SplitRecipeJobDef)com.dataiku.dip.remoterun.package$.MODULE$.getFromResourceOrFile(descFilePath, ClassTag$.MODULE$.apply(SplitRecipeJobDef.class));
        this.logger().info((Object)"Creating Spark context");
        Tuple3<SparkContext, SQLContext, Object> tuple32 = tuple3 = desc.useGlobalMetastore ? package$.MODULE$.setupSQLContext(GlobalMetastoreHiveConfig$.MODULE$.apply("SparkSQLRecipeJob"), (Option<String>)new Some((Object)desc.metastoreDatabase), package$.MODULE$.setupSQLContext$default$3()) : package$.MODULE$.setupSQLContext(LocalMetastoreHiveConfig$.MODULE$.apply(this.runDir(), "SparkSQLRecipeJob"), (Option<String>)None$.MODULE$, package$.MODULE$.setupSQLContext$default$3());
        if (tuple3 == null) {
            throw new MatchError(tuple3);
        }
        SparkContext sc = (SparkContext)tuple3._1();
        SQLContext ssc = (SQLContext)tuple3._2();
        Tuple2 tuple2 = new Tuple2((Object)sc, (Object)ssc);
        SparkContext sc2 = (SparkContext)tuple2._1();
        SQLContext ssc2 = (SQLContext)tuple2._2();
        StdDataikuSparkContext dsc = new StdDataikuSparkContext();
        this.openInputDataset(sc2, ssc2, dsc, desc.input, desc.readParams);
        this.logger().info((Object)new StringBuilder(27).append("Registering tempTable '").append(ENRICHED_INPUT_TEMP_TABLE_NAME).append("' \n ").append(desc.prepareSqlQuery).toString());
        Dataset input = ssc2.sql(desc.prepareSqlQuery).persist();
        com.dataiku.dip.utils.package$.MODULE$.registerTempTableProperly((Dataset<Row>)input, ENRICHED_INPUT_TEMP_TABLE_NAME);
        SplitRecipePayloadParams.Mode mode = desc.splitMode;
        if (SplitRecipePayloadParams.Mode.RANDOM.equals(mode)) {
            Buffer outputs = (Buffer)JavaConverters$.MODULE$.asScalaBufferConverter(desc.outputs).asScala();
            Buffer outputSchema = (Buffer)JavaConverters$.MODULE$.asScalaBufferConverter(((SplitRecipeJobDef.SplitRecipeOutputDef)outputs.head()).outputSchema).asScala();
            Dataset inputWithOutputSchema = input.select((Seq)((List)new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])input.columns())).toList().filter((Function1 & Serializable & scala.Serializable)col -> BoxesRunTime.boxToBoolean((boolean)outputSchema.exists((Function1 & Serializable & scala.Serializable)x$2 -> BoxesRunTime.boxToBoolean((boolean)SparkSplitRecipeJob$.$anonfun$run$2(col, x$2)))))).map((Function1 & Serializable & scala.Serializable)x$3 -> input.apply(x$3), List$.MODULE$.canBuildFrom()));
            Buffer weights = (Buffer)outputs.map((Function1 & Serializable & scala.Serializable)x$4 -> BoxesRunTime.boxToDouble((double)Predef$.MODULE$.Float2float(x$4.share)), Buffer$.MODULE$.canBuildFrom());
            double totalWeight = BoxesRunTime.unboxToDouble((Object)weights.sum((Numeric)Numeric.DoubleIsFractional$.MODULE$));
            if (totalWeight < 100.0) {
                weights.append((Seq)Predef$.MODULE$.wrapDoubleArray(new double[]{100.0 - totalWeight}));
            }
            Dataset[] randomOutputs = inputWithOutputSchema.randomSplit((double[])weights.toArray(ClassTag$.MODULE$.Double()), desc.randomSeed);
            ((TraversableLike)outputs.zip((GenIterable)Predef$.MODULE$.wrapRefArray((Object[])randomOutputs), Buffer$.MODULE$.canBuildFrom())).withFilter((Function1 & Serializable & scala.Serializable)check$ifrefutable$1 -> BoxesRunTime.boxToBoolean((boolean)SparkSplitRecipeJob$.$anonfun$run$5(check$ifrefutable$1))).foreach((Function1 & Serializable & scala.Serializable)x$5 -> {
                SparkSplitRecipeJob$.$anonfun$run$6(dsc, desc, x$5);
                return BoxedUnit.UNIT;
            });
        } else {
            ((IterableLike)JavaConverters$.MODULE$.asScalaBufferConverter(desc.outputs).asScala()).foreach((Function1 & Serializable & scala.Serializable)output -> {
                SparkSplitRecipeJob$.$anonfun$run$7(ENRICHED_INPUT_TEMP_TABLE_NAME, ssc2, dsc, desc, output);
                return BoxedUnit.UNIT;
            });
        }
        ((IterableLike)JavaConverters$.MODULE$.asScalaBufferConverter(desc.emptyOutputs).asScala()).foreach((Function1 & Serializable & scala.Serializable)emptyOutput -> {
            SparkSplitRecipeJob$.$anonfun$run$9(ssc2, sc2, dsc, desc, emptyOutput);
            return BoxedUnit.UNIT;
        });
        return sc2;
    }

    @Override
    public Logger logger() {
        return this.logger;
    }

    public static final /* synthetic */ boolean $anonfun$run$2(String col$1, SchemaColumn x$2) {
        String string = x$2.getName();
        String string2 = col$1;
        return !(string != null ? !string.equals(string2) : string2 != null);
    }

    public static final /* synthetic */ boolean $anonfun$run$5(Tuple2 check$ifrefutable$1) {
        Tuple2 tuple2 = check$ifrefutable$1;
        return tuple2 != null;
    }

    public static final /* synthetic */ void $anonfun$run$6(StdDataikuSparkContext dsc$1, SplitRecipeJobDef desc$1, Tuple2 x$5) {
        Tuple2 tuple2 = x$5;
        if (tuple2 != null) {
            SplitRecipeJobDef.SplitRecipeOutputDef output = (SplitRecipeJobDef.SplitRecipeOutputDef)tuple2._1();
            Dataset df = (Dataset)tuple2._2();
            MODULE$.saveOutputDataset(dsc$1, (Dataset<Row>)df, output.outputFullName, output.outputPartition, desc$1.writeSchema, output.writeMode);
            return;
        }
        throw new MatchError((Object)tuple2);
    }

    public static final /* synthetic */ void $anonfun$run$7(String ENRICHED_INPUT_TEMP_TABLE_NAME$1, SQLContext ssc$1, StdDataikuSparkContext dsc$1, SplitRecipeJobDef desc$1, SplitRecipeJobDef.SplitRecipeOutputDef output) {
        Buffer outputSchema = (Buffer)JavaConverters$.MODULE$.asScalaBufferConverter(output.outputSchema).asScala();
        String sqlSelectFrom = new StringBuilder(16).append("SELECT \n").append(((TraversableOnce)outputSchema.map((Function1 & Serializable & scala.Serializable)x$6 -> new StringBuilder(2).append("`").append(x$6.getName()).append("`").toString(), Buffer$.MODULE$.canBuildFrom())).mkString(", \n")).append("\nFROM `").append(ENRICHED_INPUT_TEMP_TABLE_NAME$1).append("`").toString();
        String splitQuery = new StringBuilder(7).append(sqlSelectFrom).append("\nWHERE ").append(output.sqlWhereClause).toString();
        MODULE$.logger().info((Object)new StringBuilder(33).append("SparkSQL query for building '").append(output.outputFullName).append("' \n ").append(splitQuery).toString());
        Dataset df = ssc$1.sql(splitQuery);
        MODULE$.saveOutputDataset(dsc$1, (Dataset<Row>)df, output.outputFullName, output.outputPartition, desc$1.writeSchema, output.writeMode);
    }

    public static final /* synthetic */ void $anonfun$run$9(SQLContext ssc$1, SparkContext sc$1, StdDataikuSparkContext dsc$1, SplitRecipeJobDef desc$1, SplitRecipeJobDef.SplitRecipeOutputDef emptyOutput) {
        StructType outputDFSchema = StructType$.MODULE$.apply((Seq)((TraversableLike)JavaConverters$.MODULE$.asScalaBufferConverter(emptyOutput.outputSchema).asScala()).map((Function1 & Serializable & scala.Serializable)c -> package$.MODULE$.dkuToSpark((SchemaColumn)c), Buffer$.MODULE$.canBuildFrom()));
        Dataset df = ssc$1.createDataFrame(sc$1.emptyRDD(ClassTag$.MODULE$.apply(Row.class)), outputDFSchema);
        MODULE$.saveOutputDataset(dsc$1, (Dataset<Row>)df, emptyOutput.outputFullName, emptyOutput.outputPartition, desc$1.writeSchema, emptyOutput.writeMode);
    }

    private SparkSplitRecipeJob$() {
        MODULE$ = this;
        SparkSQLLikeJob.$init$(this);
        this.logger = Logger.getLogger((String)"dku.spark.recipe.split");
    }
}

