# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
Sales_marketing_distinct_acc_filtered = dataiku.Dataset("Sales_marketing_distinct_acc_filtered")
Sales_marketing_distinct_acc_filtered_df = Sales_marketing_distinct_acc_filtered.get_dataframe()

brand_adoption_by_acc = dataiku.Dataset("brand_adoption_by_acc")
brand_adoption_by_acc_df = brand_adoption_by_acc.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
 #transpose the dataset
transposed_data = Sales_marketing_distinct_acc_filtered_df.T.reset_index().rename(columns={"index": "feature", 0:"value"})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#merged datasets on key = features
merged_data = pd.merge(brand_adoption_by_acc_df[['proba_0', 'proba_1', 'prediction', 'Feature_key', 'SHAP_val', 'sign']], transposed_data, left_on='Feature_key', right_on='feature')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
#replace any non numeric feature values with 0
merged_data['value'] = (pd.to_numeric(merged_data['value'],errors='coerce').fillna(0))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
brand_adoption_features = dataiku.Dataset("brand_adoption_features")
brand_adoption_features.write_with_schema(merged_data)

Sales_marketing_transpose = dataiku.Dataset("Sales_marketing_transpose")
Sales_marketing_transpose.write_with_schema(transposed_data)