# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from feature_selection import LassoSelection, ForwardStepwiseSelection, TreeSelection

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()

nb_features = variables['standard']['nb_features']
method = variables['standard']['feature_selection_method']
metric = variables['standard']['feature_selection_metric']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
saved_model_id = 'K26EwRgD'
saved_model = project.get_saved_model(saved_model_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
ml_task = saved_model.get_origin_ml_task()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
if method == 'tree':
    selection = TreeSelection(analysis_id=ml_task.analysis_id, ml_task_id=ml_task.mltask_id,
                             saved_model_id=saved_model_id)
    selection.launch_selection()
    model_details = saved_model.get_version_details(saved_model.get_active_version()['id'])
    variables = model_details.get_raw()['iperf']['rawImportance']['variables']
    parsed_variables = [variable.split(':')[1] if ':' in variable else variable for variable in variables]

    importances = model_details.get_raw()['iperf']['rawImportance']['importances']
    variable_importance = dict()

    for i, variable in enumerate(parsed_variables):
        try:
            variable_importance[variable] += importances[i]
        except KeyError:
            variable_importance[variable] = importances[i]
    values = list(variable_importance.values())
    values.sort(reverse=True)

    top_variables = [k for k, v in variable_importance.items() if v > values[nb_features]]
    features = pd.DataFrame({'feature': top_variables, 'rank': [variable_importance[variable] for variable in top_variables]})
else:
    if method == 'forward':
        selection = ForwardStepwiseSelection(metric=metric, analysis_id=ml_task.analysis_id,
                                             ml_task_id=ml_task.mltask_id, saved_model_id=saved_model_id,
                                             nb_features=nb_features)
        selection.launch_selection()

    elif method == 'lasso':
        selection = LassoSelection(analysis_id=ml_task.analysis_id, ml_task_id=ml_task.mltask_id,
                                   saved_model_id=saved_model_id, nb_features=nb_features)
        selection.launch_selection()
        
    else:
        raise ValueError('Method {} not supported'.format(method))

    saved_model = project.get_saved_model('K26EwRgD')
    model_details = saved_model.get_version_details(saved_model.get_active_version()['id'])

    coefs = model_details.get_raw()['iperf']['lmCoefficients']
    
    variables_coefs = {coefs['variables'][i]: coefs['coefs'][i] for i in range(len(coefs['variables']))}

    variables = list(variables_coefs.keys())
    coefs_list = [abs(val) for val in list(variables_coefs.values())]

    # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
    features = pd.DataFrame({'feature': variables, 'rank': coefs_list})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
selected_features = dataiku.Dataset("selected_features")
selected_features.write_with_schema(features)