import logging
import traceback

import numpy as np
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

from dataiku.doctor.plugins.common_algorithm import PluginPredictionAlgorithm
from dataiku.doctor.prediction.common import CategoricalHyperparameterDimension
from dataiku.doctor.prediction.common import ClassicalPredictionAlgorithm
from dataiku.doctor.prediction.common import FloatHyperparameterDimension
from dataiku.doctor.prediction.common import HyperparametersSpace
from dataiku.doctor.prediction.common import IntegerHyperparameterDimension
from dataiku.doctor.prediction.common import SGDRegressionHyperparametersSpace
from dataiku.doctor.prediction.common import SVMHyperparametersSpace
from dataiku.doctor.prediction.common import TrainableModel
from dataiku.doctor.prediction.common import TreesHyperparametersSpace
from dataiku.doctor.prediction.common import create_categorical_dimension
from dataiku.doctor.prediction.common import dump_pretrain_info
from dataiku.doctor.prediction.common import get_groups_for_hp_search_cv
from dataiku.doctor.prediction.common import get_initial_intrinsic_perf_data
from dataiku.doctor.prediction.common import get_max_features_dimension
from dataiku.doctor.prediction.common import get_selection_mode
from dataiku.doctor.prediction.common import get_svm_gamma_params_from_clf_params
from dataiku.doctor.prediction.common import prepare_multiframe
from dataiku.doctor.prediction.common import replace_value_by_empty
from dataiku.doctor.prediction.common import safe_del
from dataiku.doctor.prediction.common import safe_positive_int
from dataiku.doctor.prediction.common import scikit_model
from dataiku.doctor.prediction.deep_neural_network_prediction import DeepNeuralNetworkRegression
from dataiku.doctor.prediction.lars import DkuLassoLarsRegressor
from dataiku.doctor.prediction.lightgbm_prediction import LightGBMRegression
from dataiku.doctor.prediction.xgboost_trainable_model import XGBoostTrainableModel
from dataiku.doctor.utils import doctor_constants
from dataiku.doctor.utils.skcompat import dku_fit
from dataiku.doctor.utils.skcompat import gbt_skcompat_actual_params
from dataiku.doctor.utils.skcompat import gbt_skcompat_hp_space
from dataiku.doctor.utils.skcompat import sgd_skcompat_actual_params
from dataiku.doctor.utils.skcompat import sgd_skcompat_hp_space
from dataiku.doctor.utils.skcompat import SQUARED_LOSS_NAME
from dataiku.doctor.utils.gpu_execution import get_gpu_config_from_core_params, get_single_gpu_id_from_gpu_device, XGBOOSTGpuCapability

REGRESSION_ALGORITHMS = {}

logger = logging.getLogger(__name__)


def register_regression_algorithm(algorithm):
    REGRESSION_ALGORITHMS[algorithm.algorithm] = algorithm()

##############################################################
# IMPORTANT
#    If you add any settings here, you MUST add them to
#    classification.tmpl / regression.tmpl for the notebook export
##############################################################


class ScikitRegression(ClassicalPredictionAlgorithm):
    algorithm = "SCIKIT_MODEL"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        estimator = scikit_model(modeling_params)
        hyperparameters_space = HyperparametersSpace.from_definition(input_hp_space)
        return TrainableModel(estimator, hyperparameters_space=hyperparameters_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        return amp

    def get_search_settings(self, hyperparameter_search_params, trainable_model):
        search_settings = super(ScikitRegression, self).get_search_settings(
            hyperparameter_search_params, trainable_model
        )
        # Force hyperparameter search size to 1
        search_settings.n_iter = 1
        return search_settings


register_regression_algorithm(ScikitRegression)


class DecisionTreeRegression(ClassicalPredictionAlgorithm):
    algorithm = "DECISION_TREE_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension_class={
                "max_depth": IntegerHyperparameterDimension,
                "min_samples_leaf": IntegerHyperparameterDimension,
                "splitter": CategoricalHyperparameterDimension
            }
        )

        estimator = DecisionTreeRegressor(random_state=1337)
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        safe_del(ret, "dtc_classifier_grid")
        params = clf.get_params()
        logger.info("DT params are %s " % params)
        ret["dt"] = {
            "max_depth" : params["max_depth"],
            "criterion" : params["criterion"],
            "min_samples_leaf" : params["min_samples_leaf"],
            "splitter" : params["splitter"]
        } 
        return amp


register_regression_algorithm(DecisionTreeRegression)


class RFRegression(ClassicalPredictionAlgorithm):
    algorithm = "RANDOM_FOREST_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = TreesHyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension_class={
                "min_samples_leaf": IntegerHyperparameterDimension,
                "n_estimators": IntegerHyperparameterDimension
            },
            hp_names_to_dimension={
                "max_features": get_max_features_dimension(input_hp_space),
                "max_depth": IntegerHyperparameterDimension(replace_value_by_empty(input_hp_space["max_tree_depth"], value=0))
            }
        )

        estimator = RandomForestRegressor(random_state=1337, n_jobs=input_hp_space["n_jobs"], verbose=2)
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        safe_del(ret, "rf_regressor_grid")
        params = clf.get_params()
        logger.info("RF Params are %s " % params)

        ret["rf"] = {
            "estimators": len(clf.estimators_),
            "njobs" : params["n_jobs"] if params["n_jobs"] > 0 else -1,
            "max_tree_depth" : params["max_depth"],
            "min_samples_leaf": params["min_samples_leaf"],
            "selection_mode": get_selection_mode(params["max_features"]),
        }
        if ret["rf"]["selection_mode"] == "number":
            ret["rf"]["max_features"] = params["max_features"]
        if ret["rf"]["selection_mode"] == "prop":
            ret["rf"]["max_feature_prop"] = params["max_features"]
        amp["other"]["rf_min_samples_split"] = params["min_samples_split"]
        return amp


register_regression_algorithm(RFRegression)


class ExtraTreesRegression(ClassicalPredictionAlgorithm):
    algorithm = "EXTRA_TREES"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = TreesHyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension_class={
                "min_samples_leaf": IntegerHyperparameterDimension,
                "n_estimators": IntegerHyperparameterDimension
            },
            hp_names_to_dimension={
                "max_features": get_max_features_dimension(input_hp_space),
                "max_depth": IntegerHyperparameterDimension(replace_value_by_empty(input_hp_space["max_tree_depth"], value=0))
            }
        )

        estimator = ExtraTreesRegressor(random_state=1337, n_jobs=input_hp_space["n_jobs"], verbose=2)
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        safe_del(ret, "extra_trees_grid")
        params = clf.get_params()
        logger.info("Extra trees Params are %s " % params)
        ret["extra_trees"] = {
            "estimators": len(clf.estimators_),
            "njobs" : params["n_jobs"] if params["n_jobs"] > 0 else -1,
            "max_tree_depth" : params["max_depth"],
            "min_samples_leaf": params["min_samples_leaf"],
            "selection_mode": get_selection_mode(params["max_features"]),
        }
        if ret["extra_trees"]["selection_mode"] == "number":
            ret["extra_trees"]["max_features"] = params["max_features"]
        if ret["extra_trees"]["selection_mode"] == "prop":
            ret["extra_trees"]["max_feature_prop"] = params["max_features"]
        amp["other"]["rf_min_samples_split"] = params["min_samples_split"]
        return amp


register_regression_algorithm(ExtraTreesRegression)


class GBTRegression(ClassicalPredictionAlgorithm):
    algorithm = "GBT_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        gbt_skcompat_hp_space(input_hp_space)
        hp_space = HyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension_class={
                "min_samples_leaf": IntegerHyperparameterDimension,
                "n_estimators": IntegerHyperparameterDimension,
                "learning_rate": FloatHyperparameterDimension,
                "loss": CategoricalHyperparameterDimension,
                "max_depth": IntegerHyperparameterDimension
            },
            hp_names_to_dimension={
                "max_features": get_max_features_dimension(input_hp_space)
            }
        )

        estimator = GradientBoostingRegressor(random_state=1337, verbose=1)
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        safe_del(ret, "gbt_regressor_grid")
        params = clf.get_params()
        logger.info("GBT Params are %s " % params)

        ret["gbt"] = {
            "n_estimators": len(clf.estimators_),
            "max_depth": params["max_depth"],
            "learning_rate" : params["learning_rate"],
            "min_samples_leaf": params["min_samples_leaf"],
            "selection_mode": get_selection_mode(params["max_features"]),
            "loss" : params["loss"]
        }
        if ret["gbt"]["selection_mode"] == "number":
            ret["gbt"]["max_features"] = params["max_features"]
        if ret["gbt"]["selection_mode"] == "prop":
            ret["gbt"]["max_feature_prop"] = params["max_features"]
        gbt_skcompat_actual_params(ret["gbt"])
        return amp


register_regression_algorithm(GBTRegression)


class KNNRegression(ClassicalPredictionAlgorithm):
    algorithm = "KNN"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension={
                "n_neighbors": IntegerHyperparameterDimension(input_hp_space["k"])
            }
        )

        estimator = KNeighborsRegressor(
            weights="distance" if input_hp_space["distance_weighting"] else "uniform",
            algorithm=input_hp_space["algorithm"],
            leaf_size=input_hp_space["leaf_size"],
            p=input_hp_space["p"]
        )

        return TrainableModel(estimator, hyperparameters_space=hp_space, supports_sample_weights=False)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        logger.info("Selected KNN Params are %s " % params)
        safe_del(ret, "knn_grid")
        ret["knn"] = {
            "k" :  params["n_neighbors"],
            "distance_weighting":  params["weights"] == "distance",
            "algorithm": params["algorithm"],
            "p": params["p"],
            "leaf_size": params["leaf_size"],
        }
        return amp


register_regression_algorithm(KNNRegression)


class NeuralNetworkRegression(ClassicalPredictionAlgorithm):
    algorithm = "NEURAL_NETWORK"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension={
                'hidden_layer_sizes': IntegerHyperparameterDimension(input_hp_space["layer_sizes"])
            }
        )

        estimator = MLPRegressor(
            activation=input_hp_space["activation"],
            solver=input_hp_space["solver"],
            alpha=input_hp_space["alpha"],
            batch_size="auto" if input_hp_space["auto_batch"] else input_hp_space["batch_size"],
            max_iter=input_hp_space["max_iter"],
            random_state=input_hp_space["seed"],
            tol=input_hp_space["tol"],
            early_stopping=input_hp_space["early_stopping"],
            validation_fraction=input_hp_space["validation_fraction"],
            beta_1=input_hp_space["beta_1"],
            beta_2=input_hp_space["beta_2"],
            epsilon=input_hp_space["epsilon"],
            learning_rate=input_hp_space["learning_rate"],
            power_t=input_hp_space["power_t"],
            momentum=input_hp_space["momentum"],
            nesterovs_momentum=input_hp_space["nesterovs_momentum"],
            shuffle=input_hp_space["shuffle"],
            learning_rate_init=input_hp_space["learning_rate_init"]
        )

        return TrainableModel(estimator, hyperparameters_space=hp_space, supports_sample_weights=False)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        logger.info("Neural Network Params are %s " % params)

        ret["neural_network"] = {
            "layer_sizes": params["hidden_layer_sizes"]
        }
        return amp


register_regression_algorithm(NeuralNetworkRegression)


class XGBoostRegression(ClassicalPredictionAlgorithm):
    algorithm = "XGBOOST_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        try:
            from dataiku.doctor.prediction.dku_xgboost import build_objective_string
            from dataiku.doctor.prediction.dku_xgboost import expand_tree_method_for_xgboost
            from dataiku.doctor.prediction.dku_xgboost import instantiate_xgb_regressor
        except:
            logger.error("Failed to load xgboost package")
            traceback.print_exc()
            raise Exception("Failed to load XGBoost package")

        n_estimators = input_hp_space['n_estimators']
        if n_estimators <= 0:  # xgboost does not fail gracefully then
            raise Exception("The number of estimators must be a positive number")

        nthread = safe_positive_int(input_hp_space['nthread'])
        missing = input_hp_space['missing'] if input_hp_space['impute_missing'] else np.nan

        booster = CategoricalHyperparameterDimension(input_hp_space["booster"])
        if len(booster._get_enabled_values_list()) == 0:
            booster = create_categorical_dimension(["gbtree"])
        objective = CategoricalHyperparameterDimension(input_hp_space["objective"])
        enabled_val_list = [build_objective_string(val) for val in objective._get_enabled_values_list()]
        enabled_val_dict = create_categorical_dimension(enabled_val_list)._get_values()
        objective.set_values(enabled_val_dict)
        if len(objective._get_enabled_values_list()) == 0:
            objective = create_categorical_dimension([build_objective_string("reg_linear")])

        hp_space = HyperparametersSpace.from_definition(
            input_hp_space,
            hp_names_to_dimension_class={
                "max_depth": IntegerHyperparameterDimension,
                "learning_rate": FloatHyperparameterDimension,
                "gamma": FloatHyperparameterDimension,
                "min_child_weight": FloatHyperparameterDimension,
                "max_delta_step": FloatHyperparameterDimension,
                "subsample": FloatHyperparameterDimension,
                "colsample_bytree": FloatHyperparameterDimension,
                "colsample_bylevel": FloatHyperparameterDimension
            },
            hp_names_to_dimension={
                "reg_alpha": FloatHyperparameterDimension(input_hp_space['alpha']),
                "reg_lambda": FloatHyperparameterDimension(input_hp_space['lambda']),
                "booster": booster,
                "objective": objective
            }
        )

        gpu_config = get_gpu_config_from_core_params(core_params)

        estimator = instantiate_xgb_regressor(
            n_estimators=n_estimators,
            silent=0,
            n_jobs=nthread,
            random_state=input_hp_space['seed'],
            missing=missing,
            scale_pos_weight=input_hp_space['scale_pos_weight'],
            base_score=input_hp_space['base_score'],
            tree_method=expand_tree_method_for_xgboost(input_hp_space, gpu_config),
            tweedie_variance_power=input_hp_space['tweedie_variance_power']
        )

        device = XGBOOSTGpuCapability.get_device(gpu_config)
        if device != "cpu":
            estimator.set_params(gpu_id=get_single_gpu_id_from_gpu_device(device))

        prediction_type = core_params[doctor_constants.PREDICTION_TYPE]
        is_causal = prediction_type == doctor_constants.CAUSAL_REGRESSION

        return XGBoostTrainableModel(
            estimator,
            hyperparameters_space=hp_space,
            is_early_stopping_enabled=(not is_causal and input_hp_space['enable_early_stopping']),
            early_stopping_rounds=input_hp_space['early_stopping_rounds'],
            evaluation_metric_name=modeling_params["metrics"]["evaluationMetric"],
            prediction_type=prediction_type
        )

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()

        # We serialize np.nan missing parameter as None in actual params
        missing = None if pd.isna(params["missing"]) else params["missing"]

        logger.info("Selected XGBoost Params are %s " % params)
        safe_del(ret, "xgboost_grid")
        ret["xgboost"] = {}
        ret["xgboost"]["max_depth"] = params["max_depth"]
        ret["xgboost"]["learning_rate"] = params["learning_rate"]
        ret["xgboost"]["n_estimators"] = params["n_estimators"]
        ret["xgboost"]["nthread"] = params["n_jobs"] if params["n_jobs"] > 0 else -1
        ret["xgboost"]["gamma"] = params["gamma"]
        ret["xgboost"]["min_child_weight"] = params["min_child_weight"]
        ret["xgboost"]["max_delta_step"] = params["max_delta_step"]
        ret["xgboost"]["subsample"] = params["subsample"]
        ret["xgboost"]["colsample_bytree"] = params["colsample_bytree"]
        ret["xgboost"]["colsample_bylevel"] = params["colsample_bylevel"]
        ret["xgboost"]["alpha"] = params["reg_alpha"]
        ret["xgboost"]["lambda"] = params["reg_lambda"]
        ret["xgboost"]["seed"] = params["random_state"]
        ret["xgboost"]["impute_missing"] = True if missing is not None else False
        ret["xgboost"]["missing"] = missing
        ret["xgboost"]["base_score"] = params["base_score"]
        ret["xgboost"]["scale_pos_weight"] = params["scale_pos_weight"]
        ret["xgboost"]["enable_early_stopping"] = fit_params.get('early_stopping_rounds') is not None
        ret["xgboost"]["early_stopping_rounds"] = fit_params.get('early_stopping_rounds')
        ret["xgboost"]["booster"] = params.get("booster")
        ret["xgboost"]["objective"] = params.get("objective").replace(":", "_")
        ret["xgboost"]["tweedie_variance_power"] = params.get("tweedie_variance_power")
        return amp


register_regression_algorithm(XGBoostRegression)


class LARSRegression(ClassicalPredictionAlgorithm):
    algorithm = "LARS"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace({})
        estimator = DkuLassoLarsRegressor(max_var=modeling_params["lars_grid"]["max_features"])
        return TrainableModel(estimator, hyperparameters_space=hp_space, supports_sample_weights=False)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        return amp


register_regression_algorithm(LARSRegression)


class SVMRegression(ClassicalPredictionAlgorithm):
    algorithm = "SVM_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        gamma_compatible_kernel_enabled = any(
            input_hp_space["kernel"]["values"][kernel]["enabled"]
            for kernel in ["rbf", "sigmoid", "poly"]
        )

        if not gamma_compatible_kernel_enabled:
            hp_space = HyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "C": FloatHyperparameterDimension,
                    "kernel": CategoricalHyperparameterDimension
                }
            )

        else:
            hp_space = SVMHyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "C": FloatHyperparameterDimension,
                    "gamma": CategoricalHyperparameterDimension,
                    "custom_gamma": FloatHyperparameterDimension,
                    "kernel": CategoricalHyperparameterDimension
                }
            )

        estimator = SVR(
            coef0=input_hp_space['coef0'],
            tol=input_hp_space['tol'],
            max_iter=input_hp_space['max_iter']
        )

        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        logger.info("Selected SVM Params are %s " % params)
        safe_del(ret, "svr_grid")

        ret["svm"] = {
            "C": params["C"],
            "kernel": params["kernel"],
            "tol": params["tol"],
            "max_iter": params["max_iter"],
            "coef0": params["coef0"]
        }
        ret["svm"].update(get_svm_gamma_params_from_clf_params(params))
        return amp


register_regression_algorithm(SVMRegression)


class SGDRegression(ClassicalPredictionAlgorithm):
    algorithm = "SGD_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        #TODO: elastic-net elastic_net, elasticnet
        input_hp_space = sgd_skcompat_hp_space(input_hp_space)
        squared_loss_enabled = input_hp_space["loss"]["values"][SQUARED_LOSS_NAME]["enabled"]
        huber_loss_enabled = input_hp_space["loss"]["values"]["huber"]["enabled"]

        if squared_loss_enabled and not huber_loss_enabled:
            hp_space = HyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "alpha": FloatHyperparameterDimension,
                    "penalty": CategoricalHyperparameterDimension
                },
                hp_names_to_dimension={
                    "loss": create_categorical_dimension([SQUARED_LOSS_NAME])
                }
            )

        elif huber_loss_enabled and not squared_loss_enabled:
            hp_space = HyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "alpha": FloatHyperparameterDimension,
                    "penalty": CategoricalHyperparameterDimension,
                    "epsilon": FloatHyperparameterDimension
                },
                hp_names_to_dimension={
                    "loss": create_categorical_dimension(["huber"])
                }
            )

        elif huber_loss_enabled and squared_loss_enabled:
            hp_space = SGDRegressionHyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "alpha": FloatHyperparameterDimension,
                    "penalty": CategoricalHyperparameterDimension,
                    "loss": CategoricalHyperparameterDimension,
                    "epsilon": FloatHyperparameterDimension
                }
            )
        else:
            raise ValueError("Training failed, you must at least select one loss among 'huber' and 'squared_loss' for"
                             "Stochastic Gradient Descent regression")

        estimator = SGDRegressor(
            l1_ratio=input_hp_space["l1_ratio"],
            shuffle=True,
            max_iter=input_hp_space["max_iter"],
            tol=input_hp_space["tol"],
            verbose=2,
            random_state=1337
        )

        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        logger.info("Selected SGD Params are %s " % params)
        safe_del(ret, "sgd_grid")
        ret["sgd"] = {
            "loss": params["loss"],
            "penalty": params["penalty"],
            "alpha": params["alpha"],
            "l1_ratio": params["l1_ratio"],
            "epsilon": params["epsilon"],
            "n_iter": clf.n_iter_
        }
        sgd_skcompat_actual_params(ret["sgd"])
        return amp


register_regression_algorithm(SGDRegression)


class RidgeRegression(ClassicalPredictionAlgorithm):
    algorithm = "RIDGE_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        if input_hp_space.get("alphaMode", None) == "AUTO":
            hp_space = HyperparametersSpace({})
            estimator = RidgeCV(fit_intercept=True)
        else:
            hp_space = HyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "alpha": FloatHyperparameterDimension
                }
            )
            estimator = Ridge(fit_intercept=True, random_state=1337)
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        safe_del(ret, "ridge_grid")

        ret["ridge"] = {}
        if hasattr(clf, "alpha_"):
            ret["ridge"]["alpha"] = params.get("alpha", clf.alpha_)
        else:
            ret["ridge"]["alpha"] = params.get("alpha", 0)

        return amp


register_regression_algorithm(RidgeRegression)


class LassoRegression(ClassicalPredictionAlgorithm):
    algorithm = "LASSO_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace({})

        if input_hp_space.get("alphaMode", None) == "AUTO_CV":
            estimator = LassoCV(fit_intercept=True, cv=3)
        elif input_hp_space.get("alphaMode", None) == "AUTO_IC":
            estimator = LassoLarsIC(fit_intercept=True, verbose=3)
        else:
            hp_space = HyperparametersSpace.from_definition(
                input_hp_space,
                hp_names_to_dimension_class={
                    "alpha": FloatHyperparameterDimension
                }
            )

            estimator = Lasso(fit_intercept=True)

        return TrainableModel(estimator, hyperparameters_space=hp_space, supports_sample_weights=False)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        safe_del(ret, "ridge_grid")
        params = clf.get_params()
        ret["lasso"] = {}
        if "alpha" in params:
            ret["lasso"]["alpha"] = params["alpha"]
        if not "alpha" in ret["lasso"] and hasattr(clf, "alpha_"):
            ret["lasso"]["alpha"] = clf.alpha_
        else:
            ret["lasso"]["alpha"] = 0
        return amp


register_regression_algorithm(LassoRegression)


class LeastSquareRegression(ClassicalPredictionAlgorithm):
    algorithm = "LEASTSQUARE_REGRESSION"

    def model_from_params(self, input_hp_space, modeling_params, core_params):
        hp_space = HyperparametersSpace({})
        estimator = LinearRegression(fit_intercept=True, n_jobs=input_hp_space['n_jobs'])
        return TrainableModel(estimator, hyperparameters_space=hp_space)

    def actual_params(self, ret, clf, fit_params):
        amp = {"resolved": ret, "other": {}}
        params = clf.get_params()
        logger.info("Selected Ordinary Least Squares Params are %s " % params)
        safe_del(ret, "least_squares")
        if "n_jobs" in params:
            ret["n_jobs"] = params["n_jobs"]
        return amp


register_regression_algorithm(LeastSquareRegression)

register_regression_algorithm(PluginPredictionAlgorithm)

register_regression_algorithm(LightGBMRegression)

register_regression_algorithm(DeepNeuralNetworkRegression)

def regression_fit_ensemble(modeling_params, core_params, train_X, train_y, sample_weight=None):
    # To avoid circular imports
    from dataiku.doctor.prediction.ensembles import EnsembleRegressor

    logger.info("Fitting ensemble model")
    clf = EnsembleRegressor(modeling_params["ensemble_params"], core_params)
    clf = clf.fit(train_X, train_y, sample_weight=sample_weight)

    initial_intrinsic_perf_data = {}
    actual_params = {"resolved": modeling_params}

    return clf, actual_params, train_X, initial_intrinsic_perf_data


def regression_fit_single(modeling_params, core_params, transformed_train, model_folder_context=None,
                          gridsearch_done_fn=None, with_sample_weight=False, monotonic_cst=None):
    """
    Returns (clf, actual_params, prepared_train_X, initial_intrinsic_perf_data)
    Extracts the best estimator for grid search ones
    """
    train_X = transformed_train["TRAIN"]
    column_labels = [c for c in train_X.columns()]
    train_y = transformed_train["target"]
    train_X, is_sparse = prepare_multiframe(train_X, modeling_params)

    algorithm = modeling_params['algorithm']
    if algorithm not in REGRESSION_ALGORITHMS.keys():
        raise Exception("Algorithm not available in Python: %s" % algorithm)
    algorithm = REGRESSION_ALGORITHMS[algorithm]

    hyperparameter_search_runner = algorithm.get_search_runner(core_params, modeling_params, column_labels=column_labels,
                                                               model_folder_context=model_folder_context,
                                                               unprocessed=transformed_train["UNPROCESSED"])

    if with_sample_weight:
        train_w = np.array(transformed_train["weight"])
    else:
        train_w = None

    groups = get_groups_for_hp_search_cv(modeling_params, transformed_train)

    # grid searcher figures out whether or not the algorithm supports sample weights
    hyperparameter_search_runner.initialize_search_context(train_X, train_y,
                                                           groups=groups,
                                                           sample_weight=train_w,
                                                           monotonic_cst=monotonic_cst)
    clf = hyperparameter_search_runner.get_best_estimator()

    if gridsearch_done_fn:
        gridsearch_done_fn()

    dump_pretrain_info(clf, train_X, train_y, train_w)

    final_fit_parameters = hyperparameter_search_runner.get_final_fit_parameters(sample_weight=train_w)
    dku_fit(clf, train_X, train_y, **final_fit_parameters)

    initial_intrinsic_perf_data = get_initial_intrinsic_perf_data(train_X, is_sparse)

    if not hyperparameter_search_runner.search_skipped():
        initial_intrinsic_perf_data.update(hyperparameter_search_runner.get_score_info())

    # get_actual_params performs the translation sklearn params (after refit) (e.g. n_estimators)
    # to DSS(raw) params (e.g rf_n_estimators)
    actual_params = algorithm.get_actual_params(modeling_params, clf, final_fit_parameters)
    logger.info("Output params are %s" % actual_params)

    return clf, actual_params, train_X, initial_intrinsic_perf_data
