/*
 * Decompiled with CFR 0.152.
 */
package com.dataiku.dip.dataflow.exec.fuzzyjoin;

import com.dataiku.dip.coremodel.Dataset;
import com.dataiku.dip.coremodel.Schema;
import com.dataiku.dip.coremodel.SchemaColumn;
import com.dataiku.dip.coremodel.SerializedDataset;
import com.dataiku.dip.coremodel.SerializedRecipe;
import com.dataiku.dip.dao.DatasetsDAO;
import com.dataiku.dip.dataflow.JobActivity;
import com.dataiku.dip.dataflow.exec.fuzzyjoin.FuzzyJoinRecipeHelper;
import com.dataiku.dip.dataflow.exec.fuzzyjoin.FuzzyJoinRecipePayloadParams;
import com.dataiku.dip.dataflow.exec.joinlike.ColumnSuggestionWithDistance;
import com.dataiku.dip.dataflow.exec.joinlike.JoinInputDescBase;
import com.dataiku.dip.dataflow.exec.joinlike.JoinLikeRecipeUtils;
import com.dataiku.dip.dataflow.graph.FlowRecipe;
import com.dataiku.dip.datasets.Type;
import com.dataiku.dip.server.recipes.GenericRecipesValidationService;
import com.dataiku.dip.server.recipes.ServiceUtils;
import com.dataiku.dip.server.services.TransactionService;
import com.dataiku.dip.transactions.ifaces.Transaction;
import com.dataiku.dip.utils.ExceptionUtils;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

@Service
public class FuzzyJoinRecipeService {
    @Autowired
    private GenericRecipesValidationService validationService;
    @Autowired
    private DatasetsDAO datasetsDAO;
    @Autowired
    private TransactionService transactionService;

    public List<FuzzyJoinRecipePayloadParams.MatchingCondition> suggestJoinConditions(SerializedRecipe sr, String payload) throws Exception {
        FlowRecipe fr = new FlowRecipe(sr);
        JobActivity activity = new JobActivity(this.validationService.getSampleSubgraph(fr));
        FuzzyJoinRecipePayloadParams params = new FuzzyJoinRecipeHelper().loadParams(payload, sr);
        FuzzyJoinRecipePayloadParams.JoinDesc join = (FuzzyJoinRecipePayloadParams.JoinDesc)params.joins.get(params.joins.size() - 1);
        String datasetFullName1 = ((JoinInputDescBase)params.virtualInputs.get((int)join.table1)).name;
        String datasetFullName2 = ((JoinInputDescBase)params.virtualInputs.get((int)join.table2)).name;
        Dataset dataset1 = ServiceUtils.getDataset(activity, this.datasetsDAO, datasetFullName1);
        Dataset dataset2 = ServiceUtils.getDataset(activity, this.datasetsDAO, datasetFullName2);
        Schema schema1 = dataset1.getSchema();
        Schema schema2 = dataset2.getSchema();
        return new FuzzyJoinConditionsSuggester().computeSuggestions(join, schema1, schema2);
    }

    public JoinLikeRecipeUtils.InputReplacementTestResult testVirtualInputReplacement(SerializedRecipe sr, String payload, int virtualInputIndex, SerializedDataset newInput) throws Exception {
        FuzzyJoinRecipePayloadParams params;
        FuzzyJoinRecipeHelper helper = new FuzzyJoinRecipeHelper();
        try (Transaction t = this.transactionService.beginRead();){
            JobActivity activity = new JobActivity(this.validationService.getSampleSubgraph(new FlowRecipe(sr)));
            params = helper.loadParams(payload, sr);
            helper.initInputDatasets(activity, params);
        }
        JoinLikeRecipeUtils.InputReplacementTestResult ret = new JoinLikeRecipeUtils.InputReplacementTestResult();
        try {
            Schema newSchema = newInput.getSchema();
            if (JoinLikeRecipeUtils.checkSchemaEmptiness(newSchema)) {
                ret.warn("Dataset schema is empty");
                return ret;
            }
            JoinLikeRecipeUtils.checkJoinColumns(virtualInputIndex, params.joins, ret, newSchema);
            JoinLikeRecipeUtils.checkSelectedColumns(virtualInputIndex, params.getSelectedColumns(), ret, newSchema);
        }
        catch (Exception e) {
            ret.warn("Failed to complete check: " + ExceptionUtils.getMessageWithCauses((Throwable)e));
        }
        return ret;
    }

    private static class FuzzyJoinConditionsSuggester
    extends JoinLikeRecipeUtils.JoinConditionsSuggester<FuzzyJoinRecipePayloadParams.MatchingCondition, FuzzyJoinRecipePayloadParams.JoinDesc> {
        private static final double COMPARABLE_TYPE_BONUS = 1.5;

        private FuzzyJoinConditionsSuggester() {
        }

        @Override
        protected List<ColumnSuggestionWithDistance<FuzzyJoinRecipePayloadParams.MatchingCondition>> buildColumnSuggestionsWithDistance(Schema schema1, Schema schema2) {
            return super.buildColumnSuggestionsWithDistance(schema1, schema2).stream().peek(mc -> {
                if (mc.suggestedMatchDesc.isPresent()) {
                    FuzzyJoinRecipePayloadParams.MatchingCondition matchingCondition = (FuzzyJoinRecipePayloadParams.MatchingCondition)mc.suggestedMatchDesc.get();
                    FuzzyJoinRecipePayloadParams.FuzzyMatchDesc fuzzyMatchDesc = matchingCondition.fuzzyMatchDesc;
                    if (fuzzyMatchDesc.distanceType != FuzzyJoinRecipePayloadParams.DistanceType.EXACT) {
                        mc.distance /= 1.5;
                    }
                }
            }).sorted(Comparator.comparingDouble(o -> o.distance)).collect(Collectors.toList());
        }

        @Override
        protected Optional<FuzzyJoinRecipePayloadParams.MatchingCondition> suggestedMatchDesc(SchemaColumn col1, SchemaColumn col2) {
            if (col1 == null || col2 == null) {
                return Optional.empty();
            }
            if (col1.getType().isNumeric() && col2.getType().isNumeric()) {
                return Optional.of(new FuzzyJoinRecipePayloadParams.MatchingCondition(new FuzzyJoinRecipePayloadParams.FuzzyMatchDesc(FuzzyJoinRecipePayloadParams.DistanceType.EUCLIDEAN, 1.0)));
            }
            if (col1.getType() == Type.STRING && col2.getType() == Type.STRING) {
                return Optional.of(new FuzzyJoinRecipePayloadParams.MatchingCondition(new FuzzyJoinRecipePayloadParams.FuzzyMatchDesc(FuzzyJoinRecipePayloadParams.DistanceType.LEVENSHTEIN, 1.0)));
            }
            if (col1.getType() == Type.GEOPOINT && col2.getType() == Type.GEOPOINT) {
                return Optional.of(new FuzzyJoinRecipePayloadParams.MatchingCondition(new FuzzyJoinRecipePayloadParams.FuzzyMatchDesc(FuzzyJoinRecipePayloadParams.DistanceType.GEO, 1.0)));
            }
            return Optional.of(new FuzzyJoinRecipePayloadParams.MatchingCondition(new FuzzyJoinRecipePayloadParams.FuzzyMatchDesc(FuzzyJoinRecipePayloadParams.DistanceType.EXACT, 0.0)));
        }
    }
}

