# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from pandas.io.json import json_normalize
from dataiku import pandasutils as pdu
import json

# Load dataset with explanations:
process_data_joined_new_prepared_24hfiltered_scored = dataiku.Dataset("process-data-joined-new_prepared_24hfiltered_scored")
df = process_data_joined_new_prepared_24hfiltered_scored.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Unnest explanations:
df = df.join(json_normalize(df['explanations'].map(json.loads).tolist()).add_prefix('explanations_'))\
    .drop(['explanations'], axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Keep only campaign identifier, recipe and explanations:
columns_to_keep =['campaignID','Recipe']
explanations_columns=[col for col in df if col.startswith('explanations_')]
columns_to_keep.extend(explanations_columns)

df = df[columns_to_keep]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Create dataframe to store (absolute) infuencing factors:
df2 = pd.DataFrame(columns=['factor','value'])
for i, col in enumerate(explanations_columns):
    df[col] = df[col].abs()
    total=round(df[col].sum(),2)

    df2 = df2.append({'factor':col.split('explanations_')[1],'value':total},ignore_index=True)

# Sort dataframe by most influencing factors
df2=df2.sort_values(by='value',ascending=False).reset_index(drop=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write 3 most influencing factors in variables:

# Get project variables
p = dataiku.Project() # create a project handle
variables = p.get_variables() # retrieve variables as a dictionary


for i in range(3):
    variable_name="factor_" + str(i+1)
    value= df2.iloc[i]["factor"]
    variables["standard"][variable_name] = value

p.set_variables(variables)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#Write df2 to the output dataset:
py_recipe_output = dataiku.Dataset("24h_explanations")
py_recipe_output.write_with_schema(df2)