import copy
import logging
from numbers import Number

import numpy as np
import pandas as pd
from scipy.stats import rv_discrete
from scipy.stats.distributions import randint
from scipy.stats.distributions import reciprocal
from scipy.stats.distributions import uniform
import sklearn
from sklearn import clone
from sklearn import model_selection
from sklearn.base import is_classifier, ClassifierMixin
from sklearn.metrics import accuracy_score
from sklearn.metrics import check_scoring
from sklearn.metrics import confusion_matrix
from sklearn.metrics import explained_variance_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score
from sklearn.metrics import r2_score
from sklearn.metrics import recall_score
from sklearn.model_selection import ParameterSampler

from dataiku.base.utils import package_is_at_least
from dataiku.base.utils import safe_unicode_str
from dataiku.core import dkujson
from dataiku.core import doctor_constants
from dataiku.core import intercom
from dataiku.doctor.crossval.search_runner import ClassicalSearchRunner
from dataiku.doctor.crossval.search_runner import SearchSettings
from dataiku.doctor.crossval.strategies.bayesian_search_strategy import BayesianSearchStrategy
from dataiku.doctor.crossval.strategies.grid_search_strategy import GridSearchStrategy
from dataiku.doctor.crossval.strategies.random_search_strategy import RandomSearchStrategy
from dataiku.doctor.prediction.custom_scoring import get_custom_evaluation_metric_scorefunc
from dataiku.doctor.prediction.custom_scoring import get_custom_evaluation_metric
from dataiku.doctor.prediction.metric import BINARY_METRICS_NAME_TO_FIELD_NAME
from dataiku.doctor.prediction.metric import MULTICLASS_METRICS_NAME_TO_FIELD_NAME
from dataiku.doctor.prediction.metric import REGRESSION_METRICS_NAME_TO_FIELD_NAME
from dataiku.doctor.sparse import prepare_multiframe_with_sparse_support, AlgorithmSparseSupport, CSRSupport
from dataiku.doctor.utils.crossval import DKUSortedSingleSplit
from dataiku.doctor.utils.estimator import set_column_labels
from dataiku.doctor.utils.metrics import log_loss
from dataiku.doctor.utils.metrics import mean_absolute_percentage_error
from dataiku.doctor.utils.metrics import mroc_auc_score
from dataiku.doctor.utils.metrics import m_average_precision_score
from dataiku.doctor.utils.metrics import rmse_score
from dataiku.doctor.utils.metrics import rmsle_score
from dataiku.doctor.utils.skcompat import gbt_skcompat_hp_space
from dataiku.doctor.utils.skcompat import instantiate_stratified_group_kfold
from dataiku.doctor.utils.skcompat import SQUARED_LOSS_NAME
from dataiku.doctor.utils.skcompat import sgd_skcompat_hp_space
from dataiku.doctor.utils.skcompat import make_scorer

logger = logging.getLogger(__name__)

def greater_is_better(metric, custom_evaluation_metric_gib=None):

    if metric == "CUSTOM":
        if custom_evaluation_metric_gib is None:
            raise ValueError("No custom metric gib flag passed in input")
        return custom_evaluation_metric_gib
    else:
        lower_is_better = ['MAE', 'MSE', 'RMSE', 'RMSLE', 'LOG_LOSS', 'MAPE']
        return metric not in lower_is_better

class HyperparameterDimension(object):

    def __init__(self, dimension_definition):
        self.dimension_definition = dimension_definition

    def __str__(self):
        return "%s(%s)" % (self.__class__.__name__, self.dimension_definition.__str__())

    def build(self, strategy):
        raise NotImplementedError("build method must be implemented in "
                                  "children of HyperparameterDimension")


class NumericalHyperparameterDimension(HyperparameterDimension):

    def _get_mode(self, strategy):
        if strategy == "GRID":
            return self.dimension_definition["gridMode"]
        elif strategy in {"RANDOM", "BAYESIAN"}:
            return self.dimension_definition["randomMode"]

    def _get_range(self):
        if "range" not in self.dimension_definition.keys():
            raise ValueError("Numerical dimension must have a 'range' parameter")
        return self.dimension_definition["range"]

    def _get_values(self):
        return self.dimension_definition.get("values", [])

    def build_grid(self, a, b, n, scaling):
        raise NotImplementedError()

    def build_marginal_distribution(self, a, b, scaling, strategy):
        raise NotImplementedError()

    def build(self, strategy):
        search_mode = self._get_mode(strategy)
        if search_mode == "EXPLICIT":
            if strategy in {"GRID", "RANDOM"}:
                return self._get_values()
            elif strategy == "BAYESIAN":
                from skopt.space import Categorical
                return Categorical(self._get_values())
            else:
                raise ValueError()
        elif search_mode == "RANGE":
            dim_range = self._get_range()
            a = dim_range["min"]
            b = dim_range["max"]
            scaling = dim_range["scaling"]
            if strategy == "GRID":
                n = dim_range["nbValues"]
                return self.build_grid(a, b, n, scaling)
            else:
                return self.build_marginal_distribution(a, b, scaling, strategy)
        else:
            raise ValueError("unknown search mode {} for strategy {}".format(search_mode, strategy))


class FloatHyperparameterDimension(NumericalHyperparameterDimension):

    def build_grid(self, a, b, n, scaling):
        if a == b:
            return [a]
        elif scaling == "LINEAR":
            return [a + (b-a) * i / (n - 1) for i in range(n)]
        elif scaling == "LOGARITHMIC":
            loga = np.log(a)
            logb = np.log(b)
            return [np.exp(loga + (logb - loga) * i / (n - 1)) for i in range(n)]

    def build_marginal_distribution(self, a, b, scaling, strategy):
        if strategy == "RANDOM":
            if a == b:
                return [a]
            elif scaling == "LINEAR":
                dist = uniform(a, b - a)
            elif scaling == "LOGARITHMIC":
                dist = reciprocal(a, b)
            else:
                raise ValueError("Dimension scaling")
        elif strategy == "BAYESIAN":
            from skopt.space import Real
            if scaling == "LINEAR":
                dist = Real(a, b)
            elif scaling == "LOGARITHMIC":
                dist = Real(a, b, prior="log-uniform")
            else:
                raise ValueError("Dimension scaling")
        else:
            raise ValueError("Search strategy")
        return dist


class IntegerHyperparameterDimension(NumericalHyperparameterDimension):

    def build_grid(self, low, high, n, scaling):
        """
            Building grid of dimension depending on strategy.
            Both low and high are inclusive
        """
        if scaling == "LINEAR":
            if high - low < n:
                return list(range(low, high + 1))
            return [int(low + (high-low) * i / (n - 1)) for i in range(n)]
        elif scaling == "LOGARITHMIC":
            log_low = np.log(low)
            log_high = np.log(high)
            return list(sorted(set([int(np.exp(log_low + (log_high - log_low) * i / (n - 1))) for i in range(n)])))

    def build_marginal_distribution(self, low, high, scaling, strategy):
        """
            Building marginal distribution of dimension depending on strategy.
            Both low and high are inclusive
        """
        if strategy == "RANDOM":
            if low == high:
                return [low]
            elif scaling == "LINEAR":
                dist = randint(low, high + 1)  # upper bound is exclusive in randint
            elif scaling == "LOGARITHMIC":
                raise NotImplementedError("Dimension scaling = {}".format(scaling))
            else:
                raise ValueError("Dimension scaling = {}".format(scaling))
        elif strategy == "BAYESIAN":
            from skopt.space import Integer
            if scaling == "LINEAR":
                dist = Integer(low, high, dtype=int)
            elif scaling == "LOGARITHMIC":
                dist = Integer(low, high, prior="log-uniform", dtype=int)  # Added in skopt 0.7
            else:
                raise ValueError("Dimension scaling = {}".format(scaling))
        else:
            raise ValueError("Search strategy = {}".format(strategy))
        return dist


class OddIntegerHyperparameterDimension(IntegerHyperparameterDimension):

    def _get_values(self):
        "Keep only odd numbers for explicit values"
        return [n for n in super(OddIntegerHyperparameterDimension, self)._get_values() if n % 2 == 1]

    def build_grid(self, low, high, n, scaling):
        """
            Both low and high are inclusive if they are odd
        """
        if low == high and low % 2 == 0:
            raise ValueError("Cannot create a grid with a single non-odd value for a parameter that only supports odd integers")
        low = low + 1 - (low % 2)  # add one if low is even
        if scaling == "LINEAR":
            if (high - low) / 2 < n:
                return list(range(low, high + 1, 2))
            return [int(low + (high-low) * i / (n - 1)) // 2 * 2 + 1 for i in range(n)]  # round to nearest odd integer towards zero
        elif scaling == "LOGARITHMIC":
            high = high - 1 + (high % 2)  # remove one if high is even
            log_low = np.log(low)
            log_high = np.log(high)
            return list(sorted(set([int(np.exp(log_low + (log_high - log_low) * i / (n - 1))) // 2 * 2 + 1 for i in range(n)])))

    def build_marginal_distribution(self, low, high, scaling, strategy):
        """
            Building marginal distribution of dimension depending on strategy.
            Both low and high are inclusive
        """
        if strategy in ["RANDOM", "BAYESIAN"]:
            if low == high and low % 2 == 0:
                raise ValueError("Cannot do {} search with a single non-odd value for a parameter that only supports odd integers".format(strategy))
            if scaling == "LINEAR":
                low = low + 1 - (low % 2)  # add one if low is even
                values = range(low, high + 1, 2)
                if strategy == "RANDOM":
                    probabilities = np.ones(len(values)) / len(values)
                    dist = rv_discrete(values=(values, probabilities))
                else:
                    from skopt.space import Categorical
                    logger.warning("Doing BAYESIAN search on an odd integer dimension using a Scikit-Optimize categorical search space")
                    dist = Categorical(values)
            else:
                raise NotImplementedError("Dimension scaling {} not implemented for odd integers".format(scaling))
        else:
            raise ValueError("Search strategy = {}".format(strategy))
        return dist


class CategoricalHyperparameterDimension(HyperparameterDimension):

    def _get_values(self):
        return self.dimension_definition.get("values", {})

    def _get_enabled_values_list(self):
        values = self._get_values()
        return [name for (name, val) in values.items() if val.get("enabled")]

    def build(self, strategy):
        if strategy in {"GRID", "RANDOM"}:
            return self._get_enabled_values_list()
        elif strategy == "BAYESIAN":
            from skopt.space import Categorical
            return Categorical(self._get_enabled_values_list())
        else:
            raise ValueError

    def set_values(self, values):
        self.dimension_definition["values"] = values


class HyperparametersSpace(object):

    def __init__(self, space_definition):
        self.space_definition = space_definition
        self.random_state = None

    def set_random_state(self, random_state):
        self.random_state = random_state

    @classmethod
    def from_definition(
            cls,
            input_hp_space,
            hp_names_to_dimension_class=None,
            hp_names_to_dimension=None,
            constructor_args=None
    ):
        space_definition = {}

        if hp_names_to_dimension_class is not None:
            for hp_name, dimension_class in hp_names_to_dimension_class.items():
                space_definition[hp_name] = dimension_class(input_hp_space[hp_name])

        if hp_names_to_dimension is not None:
            for hp_name, dimension in hp_names_to_dimension.items():
                space_definition[hp_name] = dimension

        cls_args = constructor_args or {}
        return cls(space_definition, **cls_args)

    @staticmethod
    def enrich_hyperparam_point(point):
        """
            By default only returns the hyperparam point
            May be overwritten for special handling, e.g. infer min_samples_split from min_samples_leaf
            Might return None if the point is invalid, in which case this should be handled by the caller
        """
        return point

    def build_space(self, strategy):
        space = {}
        for hyperparam_name, dimension in self.space_definition.items():
            space[hyperparam_name] = dimension.build(strategy)
        return space

    def _get_parameter_sampler(self, n_iter):
        distribution = self.build_space("RANDOM")
        return ParameterSampler(distribution, n_iter, random_state=self.random_state)

    def get_random_parameters(self, n_samples_max):
        """
        Generates up to n_samples_max random hyperparameter points, depending on whether
        they are valid or not
        """
        parameter_sampler = self._get_parameter_sampler(n_samples_max)
        for sample in parameter_sampler:
            point = self.enrich_hyperparam_point(sample)
            if point is not None:
                yield point

    def get_optimizer(self):
        return DkuOptimizer(self)


class DkuAbstractOptimizer(object):

    def ask(self, n_samples=None):
        raise NotImplementedError()

    def tell(self, params, score):
        raise NotImplementedError()


class DkuOptimizer(DkuAbstractOptimizer):

    """
        Wrapper of skopt.Optimizer to work with DSS HyperparemetersSpace.

        Provides:
         * `ask` method to retrieve new hp points to test
         * `tell` method to give back results for those points and update the optimizer

        Note: - calling multiple times ask(n_samples) without providing feedback via tell in between calls will
              yield the same samples
              - it does not support invalid points (i.e. when enrich_hyperparam_point returns None)

    """

    def __init__(self, hyperparameter_space):
        from skopt import Optimizer

        self.space = hyperparameter_space
        self.distribution = hyperparameter_space.build_space("BAYESIAN")
        self.optimized_features = self.distribution.keys()
        self.optimized_dimensions = self.distribution.values()
        self.__optimizer = Optimizer(self.optimized_dimensions, random_state=hyperparameter_space.random_state)

    def ask(self, n_samples=None):
        if n_samples is None:
            n_samples = 1

        for sample_distrib in self.__optimizer.ask(n_samples):
            sample = dict(zip(self.optimized_features, sample_distrib))
            point = self.space.enrich_hyperparam_point(sample)
            if point is None:
                raise ValueError("An invalid point was found during Bayesian optimisation: {}".format(sample))
            yield point

    def tell(self, params_list, scores):
        kept_params_list = [[params[p] for p in self.optimized_features] for params in params_list]
        self.__optimizer.tell(kept_params_list, scores)


class SGDRegressionOptimizer(DkuAbstractOptimizer):

    def __init__(self, sgd_reg_hyperparameters_space):
        from skopt import Optimizer
        space_copy = copy.deepcopy(sgd_reg_hyperparameters_space)

        # Removing "epsilon" from space definition to build dedicated optimizer
        epsilon_dim = space_copy.space_definition.pop("epsilon")
        self.__epsilon_optimizer = Optimizer([epsilon_dim.build("BAYESIAN")], random_state=space_copy.random_state)

        # Building optimizer with all hyper parameters but "epsilon"
        self.__other_optimizer = DkuOptimizer(space_copy)

    def ask(self, n_samples=None):
        for sample in self.__other_optimizer.ask(n_samples):

            # Add epsilon if required
            if sample.get("loss") == "huber":
                epsilon_draw = self.__epsilon_optimizer.ask()[0]
                sample["epsilon"] = epsilon_draw

            yield sample

    def tell(self, params_list, scores):

        other_params_list = []
        other_scores = []
        epsilon_params_list = []
        epsilon_scores = []

        for index, params in enumerate(params_list):
            if "epsilon" in params.keys():
                epsilon_params_list.append([params["epsilon"]])
                epsilon_scores.append(scores[index])

            other_params_list.append(params)
            other_scores.append(scores[index])

        if len(other_params_list) > 0:
            self.__other_optimizer.tell(other_params_list, other_scores)

        if len(epsilon_params_list) > 0:
            self.__epsilon_optimizer.tell(epsilon_params_list, epsilon_scores)


class GridHyperparametersSpace(HyperparametersSpace):

    def __init__(self, grid=None):
        if grid is None:
            self.grid = {}
        else:
            self.grid = grid
        super(GridHyperparametersSpace, self).__init__(None)

    def build_space(self, strategy):
        if strategy == "GRID":
            return self.build_grid()
        else:
            raise NotImplementedError("Other strategies than Grid search are not implemented yet")

    def build_grid(self):
        return self.grid


class TreesHyperparametersSpace(HyperparametersSpace):

    @staticmethod
    def enrich_hyperparam_point(point):
        min_samples_leaf = point.get("min_samples_leaf", None)
        if min_samples_leaf is not None:
            point["min_samples_split"] = min_samples_leaf * 3
        return point


class SGDRegressionHyperparametersSpace(HyperparametersSpace):
    """
        Special class to handle SGD Regression when both "huber" and "squared_loss" are enabled.
        In that case, when "huber" loss is selected, a new numerical hyperparameter ("epsilon") is available.
        This means that:
         * for grid-search: we need two grids:
             - a first one with only "squared_loss" and no "epsilon"
             - a second one with only "huber" and no "epsilon"
         * for random search, we draw all the hyperparameters (including "epsilon") and remove it
           afterwards in `enrich_hyperparam_point`
         * for bayesian search, we assume independence between variables and hold two optimizers:
            - one with all hp except "epsilon"
            - one with only "epsilon", used only when "squared_loss" is drawn from the other
    """

    @staticmethod
    def enrich_hyperparam_point(point):
        if point.get("loss") == "squared_loss" and point.get("epsilon") is not None:
            del point["epsilon"]
        return point

    def build_space(self, strategy):
        if strategy == "GRID":
            return self.build_grid()
        else:
            return super(SGDRegressionHyperparametersSpace, self).build_space(strategy)

    def build_grid(self):
        return [
            {
                "alpha": self.space_definition["alpha"].build("GRID"),
                "penalty": self.space_definition["penalty"].build("GRID"),
                "loss": [SQUARED_LOSS_NAME]
            },
            {
                "alpha": self.space_definition["alpha"].build("GRID"),
                "penalty": self.space_definition["penalty"].build("GRID"),
                "loss": ["huber"],
                "epsilon": self.space_definition["epsilon"].build("GRID")
            }]

    def get_optimizer(self):
        return SGDRegressionOptimizer(self)


def get_svm_gamma_params_from_clf_params(clf_params):
    ret = {}
    # always discard "gamma" for "linear" kernel, in order not to return the default "gamma" value of the clf
    if "kernel" in clf_params and clf_params["kernel"] == "linear":
        return ret

    if "gamma" in clf_params:
        if isinstance(clf_params["gamma"], Number):  # custom values
            ret["gamma"] = "custom"
            ret["custom_gamma"] = clf_params["gamma"]
        else:
            ret["gamma"] = clf_params["gamma"]
    return ret


class SVMOptimizer(DkuAbstractOptimizer):

    def __init__(self, svm_hyperparameters_space):
        from skopt import Optimizer
        space_copy = copy.deepcopy(svm_hyperparameters_space)

        # Removing "gamma" from space definition to build dedicated optimizer
        gamma_dim = space_copy.space_definition.pop("gamma")
        self.__gamma_optimizer = Optimizer([gamma_dim.build("BAYESIAN")],
                                           random_state=space_copy.random_state)

        # Removing "custom_gamma" from space definition to build dedicated optimizer
        custom_gamma_dim = space_copy.space_definition.pop("custom_gamma")
        self.__custom_gamma_optimizer = Optimizer([custom_gamma_dim.build("BAYESIAN")],
                                                  random_state=space_copy.random_state)

        # Building optimizer with all hyper parameters but "gamma" and "custom_gamma"
        self.__other_optimizer = DkuOptimizer(space_copy)

    def ask(self, n_samples=None):
        for sample in self.__other_optimizer.ask(n_samples):

            # Add gamma if required
            if sample.get("kernel") != "linear":
                gamma_draw = self.__gamma_optimizer.ask()[0]

                # Add custom gamma if required
                if gamma_draw == "custom":
                    actual_gamma = self.__custom_gamma_optimizer.ask()[0]
                else:
                    actual_gamma = gamma_draw

                sample["gamma"] = actual_gamma

            yield sample

    def tell(self, params_list, scores):

        other_params_list = []
        other_scores = []
        gamma_params_list = []
        gamma_scores = []
        custom_gamma_params_list = []
        custom_gamma_scores = []

        for index, params in enumerate(params_list):
            gamma_params = get_svm_gamma_params_from_clf_params(params)
            if "gamma" in gamma_params:
                gamma_params_list.append([gamma_params["gamma"]])
                gamma_scores.append(scores[index])
            if "custom_gamma" in gamma_params:  # custom value
                custom_gamma_params_list.append([gamma_params["custom_gamma"]])
                custom_gamma_scores.append(scores[index])

            other_params_list.append(params)
            other_scores.append(scores[index])

        if len(other_params_list) > 0:
            self.__other_optimizer.tell(other_params_list, other_scores)

        if len(custom_gamma_params_list) > 0:
            self.__custom_gamma_optimizer.tell(custom_gamma_params_list, custom_gamma_scores)

        if len(gamma_params_list) > 0:
            self.__gamma_optimizer.tell(gamma_params_list, gamma_scores)


class SVMHyperparametersSpace(HyperparametersSpace):
    """
        Special class to handle SVM classif/regression when containing kernels that may use gamma:
         * if "linear" kernel is enabled, handled separately without gamma
         * for gamma-compatible kernels, special case when both "auto"/"scale" and "custom" gamma are enabled.
           In that case, when "custom" gamma is selected, the value of "custom_gamma" are used
        This means that:
         * for grid search:
            * we do one grid for "linear" without gamma,
            * and one grid for the others, where we merge custom_gamma with "scale" and/or "auto"
         * for random search, we draw all the hyperparameters (including "gamma" and "custom_gamma") and:
            * discard them if "linear" kernel is selected
            * otherwise, select either "auto"/"scale" or the corresponding "custom_gamma" value in enrich
         * for bayesian search, we assume independence between variables and hold three optimizers:
            - one with all hp except "custom_gamma", "gamma"
            - one with only "gamma", used only when a gamma-compatible kernel is drawn
            - one with only "custom_gamma", used only when "custom" is drawn for gamma
    """

    @staticmethod
    def enrich_hyperparam_point(point):
        if point.get("kernel") == "linear":
            if "gamma" in point:
                del point["gamma"]
        else:
            if point.get("gamma") == "custom":
                if "custom_gamma" in point:
                    point["gamma"] = point["custom_gamma"]
                else:  # should not happen
                    del point["gamma"]

        # Always delete custom_gamma as it's not a proper SVC (or SVM) param
        if "custom_gamma" in point:
            del point["custom_gamma"]
        return point

    def build_space(self, strategy):
        if strategy == "GRID":
            return self.build_grid()
        else:
            return super(SVMHyperparametersSpace, self).build_space(strategy)

    def build_grid(self):
        grids = []

        if self.space_definition["kernel"].dimension_definition["values"]["linear"]["enabled"]:
            grids.append({
                "C": self.space_definition["C"].build("GRID"),
                "kernel": ["linear"]
            })

        kernels_except_linear = copy.deepcopy(self.space_definition["kernel"])
        kernels_except_linear.dimension_definition["values"]["linear"]["enabled"] = False

        used_gammas = []
        if self.space_definition["gamma"].dimension_definition["values"]["custom"]["enabled"]:
            used_gammas.extend(self.space_definition["custom_gamma"].build("GRID"))
        if self.space_definition["gamma"].dimension_definition["values"]["auto"]["enabled"]:
            used_gammas.append("auto")
        if self.space_definition["gamma"].dimension_definition["values"]["scale"]["enabled"]:
            used_gammas.append("scale")

        grids.append({
            "gamma": used_gammas,
            "C": self.space_definition["C"].build("GRID"),
            "kernel": kernels_except_linear.build("GRID")
        })

        return grids

    def get_optimizer(self):
        return SVMOptimizer(self)


class SeasonalTrendLoessHyperparametersSpace(HyperparametersSpace):

    @staticmethod
    def enrich_hyperparam_point(point):
        # `period` is always defined
        if "trend" in point and point["period"] >= point["trend"]:
            logger.info("Invalid point: period ({}) >= trend ({})".format(point["period"], point["trend"]))
            return None
        if "low_pass" in point and point["period"] >= point["low_pass"]:
            logger.info("Invalid point: period ({}) >= low_pass ({})".format(point["period"], point["low_pass"]))
            return None
        return point

    def get_optimizer(self):
        raise NotImplementedError("Seasonal trend does not support Bayesian optimization")


class ETSHyperparametersSpace(HyperparametersSpace):
    def __init__(self, space_definition):
        super(ETSHyperparametersSpace, self).__init__(space_definition)
        self.include_unstable = False

    @classmethod
    def from_definition(
            cls,
            input_hp_space,
            hp_names_to_dimension_class=None,
            hp_names_to_dimension=None,
            constructor_args=None,
            include_unstable=False,
    ):
        space = super(ETSHyperparametersSpace, cls).from_definition(input_hp_space, hp_names_to_dimension_class, hp_names_to_dimension, constructor_args)
        space.include_unstable = include_unstable
        return space

    def enrich_hyperparam_point(self, point):
        # Create the ETS (Error, Trend, Seasonality) model string
        # for example:
        # "AMN" for Additive Error, Multiplicative Trend, No Seasonality
        # "AMdA" for Additive Error, Multiplicative Damped Trend, Additive Seasonality
        hyperparam_name = ETSHyperparametersSpace.param_to_str(point["error"])
        hyperparam_name += ETSHyperparametersSpace.param_to_str(point["trend"])
        hyperparam_name += ETSHyperparametersSpace.param_to_str(point["damped_trend"])
        hyperparam_name += ETSHyperparametersSpace.param_to_str(point["seasonal"])
        # Filter out the unstable ETS models, see the paper:
        # "Some nonlinear exponential smoothing models are unstable", Rob J Hyndman and Muhammad Akram
        # https://robjhyndman.com/papers/stability.pdf
        if not self.include_unstable and hyperparam_name in ["AMN", "AMA", "AMdN", "AMdA", "AMM", "AMdM", "MMA", "MMdA", "ANM", "AAM", "AAdM"]:
            logger.info("Invalid point: {}".format(hyperparam_name))
            return None
        if point["trend"] == 'none' and point["damped_trend"] != 'false':
            return None
        return point

    @staticmethod
    def param_to_str(param):
        # Convert parameters to the ETS model representation standard
        # To be kept in sync with prediction-report.js's getETSShortName filter
        if param == "add":
            return "A"
        if param == "mul":
            return "M"
        if param == "true":
            return "d"
        if param == "none":
            return "N"
        return ""


class GluonTSTransformerHyperparametersSpace(HyperparametersSpace):

    @staticmethod
    def enrich_hyperparam_point(point):
        if point["model_dim"] % point["num_heads"] != 0:
            logger.info("Invalid point: num_heads ({}) does not divide model_dim ({})".format(point["num_heads"], point["model_dim"]))
            return None
        return point

    def get_optimizer(self):
        raise NotImplementedError("GluonTS Transformer does not support Bayesian optimization")


class TrainableModel(object):

    def __init__(self, estimator, hyperparameters_space=None,
                 supports_sample_weights=True):
        """Builds a new trainable model.

        :param estimator: A sklearn-compatible predictor.
        :type estimator: sklearn.base.BaseEstimator

        :param hyperparameters_space: The hyperparameters space (optional)
        :type hyperparameters_space: HyperparametersSpace | None

        :param supports_sample_weights:
        :type supports_sample_weights: bool
        """

        self._estimator = estimator
        self.hyperparameters_space = hyperparameters_space or HyperparametersSpace({})
        self._supports_sample_weights = supports_sample_weights

    @property
    def must_search(self):
        """A flag indicating whether the hyperparameters must be searched no
        matter the actual search space. This may be necessary when the model
        provides early stopping capabilities - in such use cases, searching is
        required even if the hyperparameter space contains a single point.

        :rtype: bool
        """
        return False

    @property
    def requires_evaluation_set(self):
        """A flag indicating whether the model requires an evaluation set to be
        provided as parameters to its `fit` function.

        :rtype: bool
        """
        return False

    @property
    def is_classifier(self):
        return is_classifier(self._estimator)

    def set_column_labels(self, column_labels):
        set_column_labels(self._estimator, column_labels)

    def set_class_weight(self, class_weight=None):
        if class_weight is None:
            return

        if "class_weight" in self._estimator.get_params():
            self._estimator.set_params(class_weight=class_weight)
        else:
            estimator_type = type(self._estimator)
            logger.warning("Class weights are not supported for estimator {}".format(estimator_type))

    def set_monotonic_cst(self, monotonic_cst=None):
        if monotonic_cst is None or np.all(np.array(monotonic_cst) == 0):
            return
        try:
            from dataiku.doctor.prediction.dku_xgboost import DkuXGBClassifier, DkuXGBRegressor
            if isinstance(self._estimator, DkuXGBClassifier) or isinstance(self._estimator, DkuXGBRegressor):
                self._estimator.set_params(monotone_constraints=tuple(monotonic_cst))
                return
        except:
            logger.warning("Could not load xgboost module")
        try:
            from lightgbm import LGBMRegressor, LGBMClassifier
            if isinstance(self._estimator, LGBMRegressor) or isinstance(self._estimator, LGBMClassifier):
                self._estimator.set_params(monotone_constraints=monotonic_cst)
                return
        except:
            logger.warning("Could not load lightgbm module")
        # Scikit-learn algorithms
        if hasattr(self._estimator, "monotonic_cst"):
            self._estimator.set_params(monotonic_cst=monotonic_cst)
        else:
            estimator_type = type(self._estimator)
            raise Exception("Monotonic constraints are not supported for estimator {}".format(estimator_type))

    def clone_estimator(self, parameters=None):
        """Clones the estimator and assigns the parameters if any.

        :param parameters: The parameters of the estimator (optional)
        :type parameters: dict | None

        :return: A clone of the estimator, with the parameters
        :rtype: sklearn.base.BaseEstimator
        """
        cloned_estimator = clone(self._estimator)

        if parameters is not None:
            cloned_estimator.set_params(**parameters)

        return cloned_estimator

    def get_scorer(self, scoring=None):
        return check_scoring(self._estimator, scoring=scoring)

    def get_fit_parameters(self, sample_weight=None, X_eval=None, y_eval=None,
                           is_final_fit=False):
        """Gets the parameters to provide to the estimator's `fit` method. It
        includes the sample weights if the model supports it. Also, X_eval and
        y_eval are provided when the attribute requires_evaluation_set is set
        to True.

        :param sample_weight: The sample weights for the dataset (optional).
        :type sample_weight: pandas.core.series.Series | None

        :param X_eval: The features of the evaluation set (optional).
        :type X_eval: numpy.ndarray | None

        :param y_eval: The target of the evaluation set (optional).
        :type y_eval: numpy.ndarray | None

        :param is_final_fit:
            A flag indicating if the parameters are for the final call to the
            estimator's `fit` method, after the best hyperparameters have been
            found.
        :type is_final_fit: bool

        :rtype: dict
        """
        # This function must always return a new copy of the fit parameters,
        # because they can be updated in a concurrent environment. See ch36793
        # for more information.
        fit_parameters = {}

        if sample_weight is not None and self._supports_sample_weights:
            fit_parameters["sample_weight"] = np.array(sample_weight)

        return fit_parameters

    def get_extra_per_split_search_result_attributes(self, estimator):
        """Gets model-specific extra attributes for the search result of a
        hyperparameter space point, on a per split basis.

        :type estimator: sklearn.base.BaseEstimator
        :rtype: dict
        """
        return {}

    def compute_model_parameters(self, per_split_search_results):
        """Computes the model parameters from the search results of a
        hyperparameter space point (across all splits).

        :type per_split_search_results: list
        :rtype: dict
        """
        # The model parameters are the same across all splits.
        return per_split_search_results[0]["parameters"]


class TabularPredictionAlgorithm(object):
    algorithm = None

    def actual_params(self, ret, clf, fit_params):
        """
        Given a fitted classifier, outputs a dict of algorithm params to be stored back to DKU
        :param ret: Input parameter grid (DKU names)
        :type ret: dict
        :param clf: Sklearn Classifier (fitted)
        :type clf: dict
        :param fit_params: Fit params
        :type fit_params: dict
        :return: Parameter dict (resolved & others)
        :rtype: dict
        """
        raise NotImplementedError('Not implemented')

    def get_search_settings(self, hyperparameter_search_params, trainable_model):
        search_strategy_type = hyperparameter_search_params.get("strategy", "GRID")

        n_threads = safe_positive_int(hyperparameter_search_params.get("nJobs"))
        distributed = hyperparameter_search_params.get('distributed', False)
        n_containers = safe_positive_int(hyperparameter_search_params.get("nContainers"))

        if search_strategy_type == "GRID":
            n_iter = hyperparameter_search_params.get('nIter', None) if hyperparameter_search_params.get('nIter', None) != 0 else None
        else:
            n_iter = hyperparameter_search_params.get('nIterRandom', None) if hyperparameter_search_params.get('nIterRandom', None) != 0 else None

        timeout = hyperparameter_search_params.get('timeout', None) if hyperparameter_search_params.get('timeout', None) != 0 else None

        if search_strategy_type == "GRID":
            search_strategy = GridSearchStrategy(trainable_model.hyperparameters_space, hyperparameter_search_params.get('randomized', False))
        elif search_strategy_type == "RANDOM":
            search_strategy = RandomSearchStrategy(trainable_model.hyperparameters_space)
        elif search_strategy_type == "BAYESIAN":

            search_strategy = BayesianSearchStrategy(trainable_model.hyperparameters_space)
        else:
            raise ValueError("Unknown hyperparameter search strategy: {}".format(search_strategy_type))

        return SearchSettings(search_strategy, n_threads, distributed, n_containers, n_iter, timeout)

    def get_search_runner(self, *args, **kwargs):
        raise NotImplementedError("get_search_runner is not implemented")

    def get_actual_params(self, modeling_params, clf, fit_params):
        ret = {
            "algorithm": modeling_params["algorithm"],
            "skipExpensiveReports": modeling_params["skipExpensiveReports"]
        }
        return self.actual_params(ret, clf, fit_params)


class ClassicalPredictionAlgorithm(TabularPredictionAlgorithm):

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        """
        Given the modeling & input params outputs a TrainableModel instance

        :param input_hp_space: Input hyper-parameter space (DKU names)
        :type input_hp_space: dict
        :param modeling_params: Modeling params for current model
        :type modeling_params: dict
        :param core_params: ML task core params
        :type core_params: dict
        :return: trainable_model
        :rtype: TrainableModel
        """
        raise NotImplementedError('Not implemented')

    def get_search_runner(self, core_params, modeling_params, column_labels=None, model_folder_context=None,
                          target_map=None, unprocessed=None):
        logger.info("Create CLF from params: {} for algorithm {}".format(modeling_params, self.algorithm))

        input_hp_space = get_input_hyperparameter_space(modeling_params, self.algorithm)
        trainable_model = self.model_from_params(input_hp_space, modeling_params, core_params)
        trainable_model.set_column_labels(column_labels)

        hyperparameter_search_params = modeling_params.get("grid_search_params", {})
        trainable_model.hyperparameters_space.set_random_state(hyperparameter_search_params.get("seed", 0))
        
        search_settings = self.get_search_settings(hyperparameter_search_params, trainable_model)

        prediction_type = core_params[doctor_constants.PREDICTION_TYPE]
        cv = build_cv(modeling_params, column_labels, prediction_type in {doctor_constants.BINARY_CLASSIFICATION, doctor_constants.MULTICLASS})
        grid_scorer = get_grid_scorer(modeling_params, prediction_type, target_map, unprocessed)


        return ClassicalSearchRunner(
            trainable_model=trainable_model,
            scoring=grid_scorer,
            cv=cv,
            search_settings=search_settings,
            evaluation_metric=modeling_params["metrics"]["evaluationMetric"],
            model_folder_context=model_folder_context,
            custom_evaluation_metric_gib=(
                get_custom_evaluation_metric(modeling_params["metrics"])["greaterIsBetter"]
                if modeling_params.get("metrics", {}).get("evaluationMetric") == "CUSTOM"
                else True),
        )

GRID_NAMES = {
    'RANDOM_FOREST_REGRESSION': 'rf_regressor_grid',
    'RANDOM_FOREST_CLASSIFICATION': 'rf_classifier_grid',
    'EXTRA_TREES': 'extra_trees_grid',
    'GBT_CLASSIFICATION': 'gbt_classifier_grid',
    'GBT_REGRESSION': 'gbt_regressor_grid',
    'DECISION_TREE_CLASSIFICATION': 'dtc_classifier_grid',
    'DECISION_TREE_REGRESSION': 'dtc_classifier_grid',
    'LOGISTIC_REGRESSION': 'logit_grid',
    'SVM_REGRESSION': 'svr_grid',
    'SVC_CLASSIFICATION': 'svc_grid',
    'SGD_REGRESSION': 'sgd_reg_grid',
    'SGD_CLASSIFICATION': 'sgd_grid',
    'RIDGE_REGRESSION': 'ridge_grid',
    'LASSO_REGRESSION': 'lasso_grid',
    'KNN': 'knn_grid',
    'LIGHTGBM_CLASSIFICATION': 'lightgbm_classification_grid',
    'LIGHTGBM_REGRESSION': 'lightgbm_regression_grid',
    'XGBOOST_CLASSIFICATION': 'xgboost_grid',
    'XGBOOST_REGRESSION': 'xgboost_grid',
    'LEASTSQUARE_REGRESSION': 'least_squares_grid',
    'NEURAL_NETWORK': 'neural_network_grid',
    'LARS': "lars_grid",
    'CUSTOM_PLUGIN': "plugin_python_grid",
    "TRIVIAL_IDENTITY_TIMESERIES": "trivial_identity_timeseries_grid",
    "SEASONAL_NAIVE": "seasonal_naive_timeseries_grid",
    "AUTO_ARIMA": "autoarima_timeseries_grid",
    "ARIMA": "arima_grid",
    "ETS": "ets_timeseries_grid",
    "SEASONAL_LOESS": "seasonal_loess_timeseries_grid",
    "PROPHET": "prophet_timeseries_grid",
    "GLUONTS_NPTS_FORECASTER": "gluonts_npts_timeseries_grid",
    "GLUONTS_TORCH_SIMPLE_FEEDFORWARD": "gluonts_torch_simple_feed_forward_timeseries_grid",
    "GLUONTS_TORCH_DEEPAR": "gluonts_torch_deepar_timeseries_grid",
    "GLUONTS_SIMPLE_FEEDFORWARD": "gluonts_simple_feed_forward_timeseries_grid",
    "GLUONTS_DEEPAR": "gluonts_deepar_timeseries_grid",
    "GLUONTS_TRANSFORMER": "gluonts_transformer_timeseries_grid",
    "GLUONTS_MQCNN": "gluonts_mqcnn_timeseries_grid",
    "DEEP_NEURAL_NETWORK_REGRESSION": "deep_neural_network_regression_grid",
    "DEEP_NEURAL_NETWORK_CLASSIFICATION": "deep_neural_network_classification_grid",
    "CAUSAL_FOREST": "causal_forest_grid",
}


class PredictionAlgorithmSparseSupport(AlgorithmSparseSupport):

    ALGORITHMS_WITH_SETTABLE_CSR_SUPPORT = {
        'XGBOOST_CLASSIFICATION', 'XGBOOST_REGRESSION',
        'LIGHTGBM_CLASSIFICATION', 'LIGHTGBM_REGRESSION',
        'RANDOM_FOREST_CLASSIFICATION', 'RANDOM_FOREST_REGRESSION',
        'GBT_CLASSIFICATION', 'GBT_REGRESSION'
    }

    ALGORITHMS_WITH_NON_SETTABLE_CSR_SUPPORT = {
        'SGD_CLASSIFICATION', 'SGD_REGRESSION',
        'RIDGE_REGRESSION',
        'LASSO_REGRESSION',
        'LOGISTIC_REGRESSION',
    }

    GRID_NAMES = GRID_NAMES

    def _algorithm_supports_csr(self):
        # For plugin algorithm, check in plugin params whether it accepts sparse matrix or not
        if self.algorithm == "CUSTOM_PLUGIN":
            if self.modeling_params["plugin_python_grid"].get("acceptsSparseMatrix", False):
                return CSRSupport.REQUESTED
            else:
                return CSRSupport.DISABLED
        return super(PredictionAlgorithmSparseSupport, self)._algorithm_supports_csr()

    def supports_csr(self):
        return self._algorithm_supports_csr() != CSRSupport.UNSUPPORTED


class PredictionAlgorithmNaNSupport(object):

    ALGORITHMS_WITH_NAN_SUPPORT = {
        'XGBOOST_CLASSIFICATION', 'XGBOOST_REGRESSION',
        'LIGHTGBM_CLASSIFICATION', 'LIGHTGBM_REGRESSION'
    }
    BEST_SPLITTER_ALGORITHMS = {'RANDOM_FOREST_CLASSIFICATION', 'RANDOM_FOREST_REGRESSION'}
    RANDOM_SPLITTER_ALGORITHMS = {'EXTRA_TREES'}
    DECISION_TREE_ALGORITHMS = {'DECISION_TREE_CLASSIFICATION', 'DECISION_TREE_REGRESSION'}

    ALGORITHMS_WITH_POTENTIAL_NAN_SUPPORT = ALGORITHMS_WITH_NAN_SUPPORT.union(BEST_SPLITTER_ALGORITHMS, RANDOM_SPLITTER_ALGORITHMS, DECISION_TREE_ALGORITHMS)

    FEATURE_REDUCTION_WITH_NAN_SUPPORT = {'NONE', 'RANDOM_FOREST'}

    ALGORITHMS_WITH_UNRECORDED_AS_NAN = {
        'XGBOOST_CLASSIFICATION', 'XGBOOST_REGRESSION'
    }

    def __init__(self, modeling_params, preprocessing_params=None):
        self.modeling_params = modeling_params
        self.algorithm = self.modeling_params["algorithm"]
        self.preprocessing_params = preprocessing_params
        feature_reduction = preprocessing_params["feature_selection_params"]["method"] if preprocessing_params is not None else None
        self.supports_nan = self._supports_nan(feature_reduction)

    def _supports_nan(self, feature_reduction):
        if feature_reduction is not None and feature_reduction not in self.FEATURE_REDUCTION_WITH_NAN_SUPPORT:
            logger.info("NaN support unavailable: incompatible feature reduction {}".format(feature_reduction))
            return False
        elif self.algorithm not in self.ALGORITHMS_WITH_POTENTIAL_NAN_SUPPORT:
            logger.info("NaN support unavailable: incompatible algorithm {}".format(self.algorithm))
            return False
        else:
            if self.algorithm in self.RANDOM_SPLITTER_ALGORITHMS and not package_is_at_least(sklearn, "1.6"):
                logger.info("NaN support unavailable: incompatible algorithm {} on scikit-learn=={}".format(self.algorithm, sklearn.__version__))
                return False
            elif self.algorithm in self.BEST_SPLITTER_ALGORITHMS and not package_is_at_least(sklearn, "1.4"):
                logger.info("NaN support unavailable: incompatible algorithm {} on scikit-learn=={}".format(self.algorithm, sklearn.__version__))
                return False
            elif self.algorithm in self.DECISION_TREE_ALGORITHMS:
                if self.modeling_params[GRID_NAMES[self.algorithm]]["splitter"]["values"]["random"]["enabled"] and not package_is_at_least(sklearn, "1.6"):
                    logger.info("NaN support unavailable: incompatible algorithm {} (with random splitter) on scikit-learn=={}".format(self.algorithm, sklearn.__version__))
                    return False
                if not self.modeling_params[GRID_NAMES[self.algorithm]]["splitter"]["values"]["random"]["enabled"] and not package_is_at_least(sklearn, "1.4"):
                    logger.info("NaN support unavailable: incompatible algorithm {} on scikit-learn=={}".format(self.algorithm, sklearn.__version__))
                    return False
            else:
                logger.info("NaN support available: compatible algorithm {} and feature reduction {}".format(self.algorithm, feature_reduction))
                return True

    @property
    def unrecorded_value(self):
        # There are two different and incompatible ways of handling unrecorded entries in a sparse matrix:
        #   - treating unrecorded as 0 (as do scipy.sparse.csr_matrix.toarray, scikit-learn algorithms that
        #     support sparse inputs, LightGBM)
        #   - treating unrecorded as NaN (XGBoost)
        # The purpose of this property is to provide consistency between sparse and dense representation of
        # the data, for all ML algorithms being used to train the model
        if self.supports_nan and self.algorithm in self.ALGORITHMS_WITH_UNRECORDED_AS_NAN:
            return np.nan
        else:
            return 0.


class DefaultPredictionAlgorithmNaNSupport(object):

    @property
    def supports_nan(self):
        return False

    @property
    def unrecorded_value(self):
        return 0.


def prepare_multiframe(multiframe, modeling_params):
    sparse_support = PredictionAlgorithmSparseSupport(modeling_params)
    return prepare_multiframe_with_sparse_support(multiframe, sparse_support)


def scikit_model(modeling_params):
    code = modeling_params["custom_python"]['code']
    ctx = {}
    exec(code, ctx)

    clf = ctx.get('clf', None)

    if clf is None:
        raise Exception("No variable 'clf' defined in Custom Python model")

    logger.info("Using custom mode: %s" % clf)
    return clf


def build_cv(modeling_params, column_labels, is_classification):
    grid_search_params = modeling_params["grid_search_params"]
    seed = grid_search_params.get("cvSeed", 1337)
    mode = grid_search_params.get("mode", None)
    if mode is None:
        logger.info("Using default CV strategy (3-fold CV, auto-stratified)")
        return 3

    elif mode == "SHUFFLE":
        if not 1 > grid_search_params.get("splitRatio", -1) > 0:
            raise ValueError('Grid seach split ratio should be in interval ]0; 1[.')

        test_size = 1.0-grid_search_params["splitRatio"]  # TODO would be better to use the test ratio rather than train ratio as param but this is more consistent with DSS
        logger.info("test_size={}".format(test_size))
        if is_classification and grid_search_params.get("stratified", False):
            logger.info("Using stratified shuffle split with ratio %s" % grid_search_params["splitRatio"])
            return model_selection.StratifiedShuffleSplit(
                n_splits=grid_search_params["shuffleIterations"],
                test_size=test_size,
                random_state=seed
            )
        else:
            logger.info("Using shuffle split with ratio %s" % grid_search_params["splitRatio"])
            return model_selection.ShuffleSplit(
                n_splits=grid_search_params["shuffleIterations"],
                test_size=test_size,
                random_state=seed
            )

    elif mode == "KFOLD":
        if not 2 <= grid_search_params.get("nFolds", 0) <= 1000:
            raise ValueError('Grid seach number of fold should be an integer in [2 ; 1000].')

        use_grouped = grid_search_params.get("grouped", False)
        use_stratified = grid_search_params.get("stratified", False)
        if use_stratified and not is_classification:
            logger.warning("Stratified k-fold CV can only be used with classification models. Falling back to non-stratified k-fold CV.")
            use_stratified = False

        if use_stratified and use_grouped:
            logger.info("Using stratified group K-Fold CV with k=%s" % grid_search_params["nFolds"])
            # NB: Don't use shuffle=True with StratifiedGroupKFold, as there is a bug in scikit-learn. See sc-131099 for details.
            logger.info('Setting shuffle=False for StratifiedGroupKFold splitter, ignoring random seed')
            return instantiate_stratified_group_kfold(
                n_splits=grid_search_params["nFolds"],
                shuffle=False
            )
        elif use_stratified:
            logger.info("Using stratified K-Fold CV with k=%s" % grid_search_params["nFolds"])
            return model_selection.StratifiedKFold(
                n_splits=grid_search_params["nFolds"],
                shuffle=True,
                random_state=seed
            )
        elif use_grouped:
            logger.info("Using group K-Fold CV with k=%s" % grid_search_params["nFolds"])
            return model_selection.GroupKFold(
                n_splits=grid_search_params["nFolds"],
            )
        else:
            logger.info("Using K-Fold CV with k=%s" % grid_search_params["nFolds"])
            return model_selection.KFold(
                n_splits=grid_search_params["nFolds"],
                shuffle=True,
                random_state=seed
            )

    elif mode == "TIME_SERIES_KFOLD":
        if not 2 <= grid_search_params.get("nFolds", 0) <= 1000:
            raise ValueError('Grid seach number of fold should be an integer in [2 ; 1000].')
        else:
            logging.info("Using Time Series CV with k=%s" % grid_search_params["nFolds"])
            return model_selection.TimeSeriesSplit(n_splits=grid_search_params["nFolds"])

    elif mode == "TIME_SERIES_SINGLE_SPLIT":
        split_ratio = grid_search_params.get("splitRatio", -1)
        if not 1 > split_ratio > 0:
            raise ValueError('Grid seach split ratio should be in interval ]0; 1[.')
        test_size = 1-split_ratio
        return DKUSortedSingleSplit(test_size=test_size)

    elif mode == "CUSTOM":
        if not len(grid_search_params.get("code", "").strip()) > 0:
            raise ValueError('Custom grid search cross-validation is not specified')
        code = grid_search_params["code"]
        ctx = {}
        exec(code, ctx)

        cv = ctx.get('cv', None)

        if cv is None:
            raise ValueError("No variable 'cv' defined in Custom grid search code")
        logger.info("Using custom CV: %s" % cv)

        try:
            cv.set_column_labels(column_labels)
        except:
            logger.info("Custom grid search code does not support column labels")

        return cv


def get_groups_for_hp_search_cv(modeling_params, transformed_train):
    """
    :return: The groups column required by the split function of the cv for HP search. Or None if group k-fold was not selected.
    :rtype: pd.Series | None
    :raises: Appropriate error messages if groups column is invalid.
    """
    grid_search_params = modeling_params["grid_search_params"]
    with_groups = grid_search_params.get("mode") == "KFOLD" and grid_search_params.get("grouped", False)
    if with_groups:
        if "groupColumnName" not in grid_search_params:
            raise ValueError("Group k-fold CV requires a group column to be set")

        group_labels = transformed_train["UNPROCESSED"][grid_search_params["groupColumnName"]]

        if group_labels.hasnans:
            if pd.api.types.is_numeric_dtype(group_labels):
                # When the group column is numerical, fill empty/NaN cells with a value that is not already in the column, i.e. max(column values) + 1
                group_labels_without_na = group_labels.dropna()
                if group_labels_without_na.empty:
                    raise ValueError("Group k-fold CV column contains no values")
                na_group_label = group_labels_without_na.max() + 1
            else:
                na_group_label = doctor_constants.FILL_NA_VALUE
            group_labels = group_labels.fillna(na_group_label)
            logger.info("Empty values found in group column for group k-fold CV, replacing with '{new_group_label}'".format(new_group_label=na_group_label))
        if group_labels.nunique() < grid_search_params["nFolds"]:
            raise ValueError("Cannot have more folds ({numFolds}) than groups ({numGroups}) for group k-fold CV".format(
                numFolds=grid_search_params["nFolds"], numGroups=group_labels.nunique()))
    else:
        group_labels = None

    return group_labels


def train_test_split(X, y, test_size, random_state):
    return model_selection.train_test_split(X, y, test_size=test_size, random_state=random_state)


def dump_pretrain_info(clf, train_X, train_y, weight=None, calibration_on_train=False):
    logger.info("Fitting model:")
    logger.info("  Model is: %s" % clf)
    logger.info("  train_X class: %s" % str(train_X.__class__))
    logger.info("  train_X shape: %s" % str(train_X.shape))
    logger.info("  train_y shape: %s" % str(train_y.shape))
    if weight is not None:
        logger.info("  train_weight shape: %s" % str(weight.shape))
    if calibration_on_train:
        logger.info("  calibration: a dedicated sub-sample of the train data has been used")


def get_initial_intrinsic_perf_data(train_X, is_sparse):

    initial_intrinsic_perf_data = {
        "modelInputNRows": train_X.shape[0],
        "modelInputNCols": train_X.shape[1],
        "modelInputIsSparse": is_sparse
    }
    if is_sparse:
        initial_intrinsic_perf_data["modelInputMemory"] = \
            train_X.data.nbytes + train_X.indptr.nbytes + train_X.indices.nbytes
    else:
        initial_intrinsic_perf_data["modelInputMemory"] = train_X.nbytes
    return initial_intrinsic_perf_data


def get_grid_scorer(modeling_params, prediction_type, target_map=None, unprocessed=None, custom_make_scorer=None):
    metric_name = modeling_params["metrics"]["evaluationMetric"]
    return get_grid_scorers(modeling_params,prediction_type,target_map,unprocessed,custom_make_scorer)[metric_name]

def get_grid_scorers(modeling_params, prediction_type, target_map=None, unprocessed=None, custom_make_scorer=None):
    """Returns a scorer, ie a function with signature(clf, X, y)
    """
    if custom_make_scorer is not None:
        make_scorer_func = custom_make_scorer
        remap = False
    else:
        make_scorer_func = make_scorer
        remap = True

    if prediction_type == doctor_constants.MULTICLASS:
        average = get_multiclass_metrics_averaging_method(modeling_params["metrics"])
        logger.info("Computing multiclass metrics with \"{}\" class averaging method".format(average))
    elif prediction_type == doctor_constants.BINARY_CLASSIFICATION:
        average = "binary"
    else:
        average = None

    scorer_map = {
        "ACCURACY":  make_scorer_func(accuracy_score, greater_is_better=True),
        "PRECISION": make_scorer_func(lambda y_true, y_pred, sample_weight=None: precision_score(y_true, y_pred, average=average, sample_weight=sample_weight),
                                      greater_is_better=True),
        "RECALL": make_scorer_func(lambda y_true, y_pred, sample_weight=None: recall_score(y_true, y_pred, average=average, sample_weight=sample_weight),
                                   greater_is_better=True),
        "F1": make_scorer_func(lambda y_true, y_pred, sample_weight=None: f1_score(y_true, y_pred, average=average, sample_weight=sample_weight),
                               greater_is_better=True),
        "LOG_LOSS": _dku_make_scorer_proba(log_loss, prediction_type, target_map, make_scorer_func,
                                           greater_is_better=False, remap=remap),
        "ROC_AUC": _dku_make_scorer_proba(mroc_auc_score, prediction_type, target_map, make_scorer_func,
                                          greater_is_better=True, remap=remap, average=average),
        "AVERAGE_PRECISION": _dku_make_scorer_proba(m_average_precision_score, prediction_type, target_map, make_scorer_func,
                                         greater_is_better=True, remap=remap, average=average),

        "COST_MATRIX": make_scorer_func(make_cost_matrix_score(modeling_params["metrics"]),
                                        greater_is_better=True),

        "CUMULATIVE_LIFT": _dku_make_scorer_proba(make_lift_score(modeling_params["metrics"]), prediction_type,
                                                  target_map, make_scorer_func, greater_is_better=True, remap=remap),

        "EVS": make_scorer_func(explained_variance_score, greater_is_better=True),
        "MAPE": make_scorer_func(mean_absolute_percentage_error, greater_is_better=False),
        "MAE": make_scorer_func(mean_absolute_error, greater_is_better=False),
        "MSE": make_scorer_func(mean_squared_error, greater_is_better=False),
        "RMSE": make_scorer_func(rmse_score, greater_is_better=False),
        "RMSLE": make_scorer_func(rmsle_score, greater_is_better=False),
        "R2": make_scorer_func(r2_score, greater_is_better=True),
    }

    metric_params = modeling_params["metrics"]
    if metric_params['evaluationMetric'] == "CUSTOM":
        custom_evaluation_metric = get_custom_evaluation_metric(modeling_params["metrics"])
        if custom_make_scorer is not None:
            custom_scorefunc = get_custom_evaluation_metric_scorefunc(custom_evaluation_metric, unprocessed)
            scorer_map["CUSTOM"] = make_scorer_func(custom_scorefunc, greater_is_better=custom_evaluation_metric["greaterIsBetter"],
                                                    needs_proba=custom_evaluation_metric["needsProbability"])
        else:
            # scikit-learn will not do much with so-called "scorer" object when there are functions, so we 'cheat' by passing
            # a function that doesn't conform to the pure spec: it adds a 'indices' argument that the grid searcher will
            # notice and use to send the sub-index of the part being scored
            def expose_indices_wrapper(estimator, Y, y, sample_weight=None, indices=None):
                custom_scorefunc = get_custom_evaluation_metric_scorefunc(custom_evaluation_metric, unprocessed, indices)
                greater_is_better = custom_evaluation_metric["greaterIsBetter"]
                needs_proba = custom_evaluation_metric["needsProbability"]

                if prediction_type == doctor_constants.BINARY_CLASSIFICATION and needs_proba:
                    # In binary classification with needs_proba == True, we use a custom scorer that does not truncate
                    # y_pred to its 2nd column (as sklearn does by default in _ProbaScorer). This is to keep consistency
                    # with `dataiku.doctor.prediction.classification_scoring.BinaryClassificationModelScorer.score
                    scorer = _dku_make_scorer_proba_binary(custom_scorefunc, target_map, greater_is_better=greater_is_better)
                else:
                    scorer = make_scorer_func(custom_scorefunc, greater_is_better=greater_is_better,
                                              needs_proba=needs_proba)

                return scorer(estimator, Y, y, sample_weight=sample_weight)

            scorer_map["CUSTOM"] = expose_indices_wrapper


    scorers_per_task = {
        doctor_constants.BINARY_CLASSIFICATION: list(BINARY_METRICS_NAME_TO_FIELD_NAME.keys()),
        doctor_constants.MULTICLASS: list(MULTICLASS_METRICS_NAME_TO_FIELD_NAME.keys()),
        doctor_constants.REGRESSION: list(REGRESSION_METRICS_NAME_TO_FIELD_NAME.keys())
    }

    return {k: v for k, v in scorer_map.items() if k in  scorers_per_task[prediction_type] }


def get_multiclass_metrics_averaging_method(metrics_params):
    if metrics_params.get("classAveragingMethod") == doctor_constants.CLASS_AVERAGING_WEIGHTED:
        average = "weighted"
    else:
        average = "macro"
    return average


def _dku_make_scorer_proba_binary(score_func, target_map, greater_is_better=True, **kwargs):
    """
    Makes scoring function for search in the case of binary classification when needs_proba == True
    This replaces sklearn default implementation where only one column of y_pred is taken into account,
    which produces a failure when the scoring function `score_func` considers that y_pred has
    dimension (N, 2) as returned by `predict_proba`
    :param score_func: function that returns the score as a function of (y, y_pred, sample_weight, **kwargs)
    :param greater_is_better: True if higher score means better model
    :param kwargs: Optional keyword arguments
    :return: the scoring function with arguments (clf, X, y, sample_weight)
    """
    sign = 1 if greater_is_better else -1

    def score(clf, X, y, sample_weight=None):
        y_pred = clf.predict_proba(X)
        assert y_pred.shape[1] == 2, "Ended up with less than two-classes. y_pred.shape: {}".format(y_pred.shape)
        columns_order = [None for _ in range(len(clf.classes_))]  # Initialize array
        for source_value, mapped_value in target_map.items():
            column_idx = list(clf.classes_).index(mapped_value)
            columns_order[column_idx] = safe_unicode_str(source_value)
        logger.info("Computing custom metric, order of y_pred columns is the following: %s", columns_order)
        if sample_weight is not None:
            return sign * score_func(y, y_pred, sample_weight=sample_weight, **kwargs)
        return sign * score_func(y, y_pred, **kwargs)

    score._sign = sign
    return score


def _dku_make_scorer_proba(score_func, prediction_type, target_map, make_scorer_func, greater_is_better=True,
                           remap=True, **kwargs):
    if not remap:
        return make_scorer_func(score_func, needs_proba=True, greater_is_better=greater_is_better, **kwargs)

    else:
        # When scoring with probas for a classification problem, it is possible that not all classes
        # are found in the training dataset. Thus the prediction may not contain probas for all classes (the missing
        # ones are 0). Therefore, we must remap the predictions to have the appropriate dimension, prior to scoring.
        sign = 1 if greater_is_better else -1

        def score_with_remap(clf, X, y, sample_weight=None):

            y_pred_raw = clf.predict_proba(X)

            # Remapping predictions with actual classes
            (nb_rows, nb_present_classes) = y_pred_raw.shape
            y_pred = np.zeros((nb_rows, len(target_map)))
            for j in range(nb_present_classes):
                actual_class_id = clf.classes_[j]
                y_pred[:, actual_class_id] = y_pred_raw[:, j]

            if sample_weight is not None:
                return sign * score_func(y, y_pred, sample_weight=sample_weight, **kwargs)

            return sign * score_func(y, y_pred, **kwargs)

        score_with_remap._sign = sign
        return score_with_remap


def weighted_quantile(values, weights, target_rate, cumsum_weights=None):
    # NB: Expects values to be a numpy array sorted in increasing order
    # kwarg cumsum_weight is meant to avoid multiple computation of the same cumulative sum
    if len(values) == 0:
        return np.nan
    if cumsum_weights is None:
        cumsum_weights = np.cumsum(weights)
    sum_weights = cumsum_weights[-1]
    target = target_rate * sum_weights
    i = np.searchsorted(cumsum_weights, target)
    try:
        res = values[-1] if i == len(values) else values[i]
    except:
        res = np.nan
    return res


def weighted_quantiles(values, weights, quantiles):
    if len(values) == 0:
        return np.array(np.nan, quantiles.shape)
    cumsum_weights = np.cumsum(weights)
    targets = quantiles * cumsum_weights[-1]
    indices = np.searchsorted(cumsum_weights, targets)
    indices[indices == len(values)] = len(values) - 1
    return values[indices]


def make_lift_score(metrics_params):
    def score(y_true, probas, sample_weight=None):
        if sample_weight is not None:
            df = pd.DataFrame({"actual" : y_true, "proba" : probas[:,1], "sample_weight": sample_weight})
            df.sort_values(by=["proba"], ascending=False, inplace=True)
            # count -> sum of weights
            global_true_weight_sum = np.dot((df["actual"] == 1).values, df["sample_weight"].values).sum()
            cumsum_weights = np.cumsum(df["sample_weight"].values)
            sum_weights = cumsum_weights[-1]
            logger.info("Total true rate (weighted) = %s / %s" % (global_true_weight_sum, sum_weights))
            global_true_rate = float(global_true_weight_sum) / float(sum_weights)

            part_sum_weights_target = metrics_params["liftPoint"] * sum_weights
            nb_rows_to_consider = np.searchsorted(cumsum_weights, part_sum_weights_target)
            logger.info("Computing lift on first %s lines (%s cumulated weight)" % (nb_rows_to_consider, part_sum_weights_target))

            df_considered = df.iloc[:nb_rows_to_consider]
            considered_true = np.dot((df_considered["actual"] == 1).values, df_considered["sample_weight"].values)
            logger.info("True rate on considered : %s / %s" % (part_sum_weights_target, considered_true))
            considered_true_rate = float(considered_true) / float(part_sum_weights_target)
        else:
            df = pd.DataFrame({"actual" : y_true, "proba" : probas[:,1]})
            df.sort_values(by=["proba"], ascending=False, inplace=True)

            global_true_cnt = (df["actual"] == 1).sum()
            logger.info("Total true rate = %s / %s" % (global_true_cnt, df.shape[0]))
            global_true_rate = float(global_true_cnt) / float(df.shape[0])

            # putting at least one row to consider to prevent from failing
            nb_rows_to_consider = max(int(metrics_params["liftPoint"] * float(df.shape[0])), 1)
            logger.info("Computing lift on first %s rows" % nb_rows_to_consider)

            df_considered = df.iloc[:nb_rows_to_consider]
            considered_true = (df_considered["actual"] == 1).sum()
            logger.info("True rate on considered : %s / %s" % (df_considered.shape[0], considered_true))
            considered_true_rate = float(considered_true) / float(df_considered.shape[0])

        lift = considered_true_rate / global_true_rate
        logger.info("Lift = %f" % lift)
        return lift
    return score

def compute_cost_matrix_score(tp, tn, fp, fn, cost_matrix_weights):
    return (tp * cost_matrix_weights["tpGain"] +
            tn * cost_matrix_weights["tnGain"] +
            fp * cost_matrix_weights["fpGain"] +
            fn * cost_matrix_weights["fnGain"])


def make_cost_matrix_score(metrics_params):
    def score(y_true, y_pred, sample_weight=None):
        conf = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
        pcd = {}
        pcd["tp"] = conf[1,1]
        pcd["tn"] = conf[0,0]
        pcd["fp"] = conf[0,1]
        pcd["fn"] = conf[1,0]
        return compute_cost_matrix_score(pcd["tp"], pcd["tn"], pcd["fp"], pcd["fn"], metrics_params["costMatrixWeights"])
    return score


def get_threshold_optim_function(metric_params):
    """Returns a function that takes (y_true, y_pred) and a 'greater_is_better'"""
    data = {
        "ACCURACY" : (accuracy_score, True),
        "PRECISION" : (precision_score, True),
        "F1" : (f1_score, True),
        "COST_MATRIX" : (make_cost_matrix_score(metric_params), True),
    }
    return data[metric_params["thresholdOptimizationMetric"]]


def get_selection_mode(max_features):
    if isinstance(max_features, int):
        return "number"
    elif isinstance(max_features, float):
        return "prop"
    else:
        return max_features


def simple_numeric_explicit_dimension(value):
    return NumericalHyperparameterDimension({
        "values": [value],
        "gridMode": "EXPLICIT",
        "randomMode": "EXPLICIT",
        "range": {
            "scaling": "LINEAR"
        }
    })


def simple_categorical_dimension(value):
    return CategoricalHyperparameterDimension({
        "values": {value: {"enabled": True}}
    })


def get_max_features_dimension(ingrid):
    result = None
    if ingrid['selection_mode'] in ["auto", "sqrt", "log2"]:
        result = simple_categorical_dimension(ingrid['selection_mode'])
    elif ingrid['selection_mode'] == "number":
        result = IntegerHyperparameterDimension(ingrid['max_features'])
    elif ingrid['selection_mode'] == "prop":
        result = FloatHyperparameterDimension(ingrid['max_feature_prop'])
    return result


def safe_positive_int(x):
    return x if isinstance(x, int) and x > 0 else -1


def replace_value_by_empty(element, value=0):
    if isinstance(element, dict):
        for k in element.keys():
            element[k] = None if element[k] == value else element[k]
        return element
    elif isinstance(element, list):
        return [ None if x == value else x for x in element ]
    else:
        return None if element == value else element


def safe_del(dic, key):
    if key in dic:
        del dic[key]


def pivot_property_to_list(o, proplist):
    res = []
    for prop in proplist:
        if o.get(prop) is True:
            res.append(prop)
    return res


def create_categorical_dimension(l):
    return CategoricalHyperparameterDimension({
        "values": {key: {"enabled": True} for key in l}
    })


def _identity(value=None,col=None):
    return value

def needs_hyperparameter_search(modeling_params):
    return (
        modeling_params.get('gridLength', 1) != 1 or
        get_input_hyperparameter_space(modeling_params, modeling_params['algorithm']).get('enable_early_stopping', False) or
        get_input_hyperparameter_space(modeling_params, modeling_params['algorithm']).get('early_stopping', False) # Yes it's not the same param name for xgboost and lightgbm -__-
    )

def get_input_hyperparameter_space(modeling_params, algorithm):
    """Returns the grid object from the pre-train modeling params for a given algorithm"""
    if algorithm == 'SCIKIT_MODEL':
        return {}
    if algorithm not in GRID_NAMES:
        raise Exception("Algorithm not available in Python: %s" % algorithm)

    grid_name = GRID_NAMES[algorithm]
    if grid_name in modeling_params:
        if algorithm.startswith("GBT_"):
            return gbt_skcompat_hp_space(modeling_params.get(grid_name))
        elif algorithm.startswith("SGD_"):
            return sgd_skcompat_hp_space(modeling_params.get(grid_name))
        else:
            return modeling_params.get(grid_name)
    else:
        raise Exception("Unexpected: no grid for %s" % algorithm)


def check_classical_prediction_type(prediction_type):
    if prediction_type not in {doctor_constants.BINARY_CLASSIFICATION, doctor_constants.MULTICLASS, doctor_constants.REGRESSION}:
        raise ValueError("Unsupported prediction type: " + prediction_type)


def regridify_optimized_params(optimized_params, modeling_params):
    """
    Get the hyperparameters space equivalent to the optimized parameters. Typically used to re-train a model with a
    1-point hyperparameter space with the optimal params.
    :type optimized_params: dict
    :type modeling_params: dict
    :rtype: dict
    """
    logger.info("Regridifying post-train params: %s" % dkujson.dumps(optimized_params))
    optimized_params_grid = intercom.backend_json_call("ml/prediction/regridify-to-pretrain", {
        "preTrain": dkujson.dumps(modeling_params),
        "postTrain": dkujson.dumps(optimized_params)
    })
    logger.info("Using unary grid params: %s" % dkujson.dumps(optimized_params_grid))

    return optimized_params_grid


def get_monotonic_cst(preprocessing_params, train_X):
    increasing_cst_columns = {f for f, p in preprocessing_params["per_feature"].items() if
                              p["role"] == "INPUT" and p["type"] == "NUMERIC" and p["monotonic"] == "INCREASE"}
    decreasing_cst_columns = {f for f, p in preprocessing_params["per_feature"].items() if
                              p["role"] == "INPUT" and p["type"] == "NUMERIC" and p["monotonic"] == "DECREASE"}
    monotonic_cst = [1 if c in increasing_cst_columns else (-1 if c in decreasing_cst_columns else 0) for c in
                     train_X.columns()]
    return monotonic_cst