# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def process_sequential_data_for_static_sankey_chart(sequential_state_labels,
                                                    sequential_state_weights,
                                                    bool_enrich_with_scaled_weights=False,
                                                    scaling_factor=1.0):
    static_sankey_data = []
    n_static_sankey_nodes = len(sequential_state_labels)
    last_static_sankey_node_index = n_static_sankey_nodes - 1
    for index in range(n_static_sankey_nodes):
        if index != last_static_sankey_node_index:
            static_sankey_record = {
                "origin": sequential_state_labels[index],
                "destination": sequential_state_labels[index + 1],
                "weight": sequential_state_weights[index + 1]
            }
            if bool_enrich_with_scaled_weights:
                static_sankey_record["weight_scaled"] = 100 * sequential_state_weights[index + 1]/scaling_factor
            static_sankey_data.append(static_sankey_record)
    static_sankey_chart_dataframe = pd.DataFrame(static_sankey_data)
    return static_sankey_chart_dataframe

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
STATIC_SANKEY_CHARTS_STYLE = 'classic_sankey' # Choose between ['aligned_with_flow_dependencies', 'classic_sankey']
# Read recipe inputs

# User dataframes:
all_interactions_users_df = dataiku.Dataset("all_interactions_users").get_dataframe(infer_with_pandas=False)
all_interactions_users_df["user_id"] = all_interactions_users_df["user_id"].astype(str)
collaborative_filtering_set_users_df = dataiku.Dataset("collaborative_filtering_set_users").get_dataframe(infer_with_pandas=False)
collaborative_filtering_set_users_df["user_id"] = collaborative_filtering_set_users_df["user_id"].astype(str)
machine_learning_set_users_df = dataiku.Dataset("machine_learning_set_users").get_dataframe(infer_with_pandas=False)
machine_learning_set_users_df["user_id"] = machine_learning_set_users_df["user_id"].astype(str)
in_recommendation_users_df = dataiku.Dataset("in_recommendation_users").get_dataframe(infer_with_pandas=False)
in_recommendation_users_df["user_id"] = in_recommendation_users_df["user_id"].astype(str)
in_recommendation_users_df["n_confident_recommendations"] = in_recommendation_users_df["n_confident_recommendations"].astype(int)

# Item dataframes:
all_interactions_items_df = dataiku.Dataset("all_interactions_items").get_dataframe()
all_interactions_items_df["item_id"] = all_interactions_items_df["item_id"].astype(str)
collaborative_filtering_set_items_df = dataiku.Dataset("collaborative_filtering_set_items").get_dataframe(infer_with_pandas=False)
collaborative_filtering_set_items_df["item_id"] = collaborative_filtering_set_items_df["item_id"].astype(str)
machine_learning_set_items_df = dataiku.Dataset("machine_learning_set_items").get_dataframe(infer_with_pandas=False)
machine_learning_set_items_df["item_id"] = machine_learning_set_items_df["item_id"].astype(str)
in_recommendation_items_df = dataiku.Dataset("in_recommendation_items").get_dataframe(infer_with_pandas=False)
in_recommendation_items_df["item_id"] = in_recommendation_items_df["item_id"].astype(str)
in_recommendation_items_df["n_times_recommended"] = in_recommendation_items_df["n_times_recommended"].astype(int)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# User sets:
all_interaction_users = list(all_interactions_users_df["user_id"])
collaborative_filtering_set_users = list(collaborative_filtering_set_users_df["user_id"])
machine_learning_set_users = list(machine_learning_set_users_df["user_id"])
recommendation_set_users = list(in_recommendation_users_df["user_id"])
with_confident_recommendations_set_users = list(in_recommendation_users_df["user_id"][in_recommendation_users_df["n_confident_recommendations"]>0])

# Item sets:
all_interaction_items = list(all_interactions_items_df["item_id"])
collaborative_filtering_set_items = list(collaborative_filtering_set_items_df["item_id"])
machine_learning_set_items = list(machine_learning_set_items_df["item_id"])
recommendation_set_items = list(in_recommendation_items_df["item_id"])
with_confident_recommendations_set_items = list(in_recommendation_items_df["item_id"][in_recommendation_items_df["n_times_recommended"]>0])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# User set lengths:
n_total_users = len(all_interaction_users)
n_users_in_cf = len(collaborative_filtering_set_users)
n_users_in_ml = len(machine_learning_set_users)
n_users_in_cf_and_ml = len(set(collaborative_filtering_set_users).intersection(set(machine_learning_set_users)))
n_users_with_recommendations = len(recommendation_set_users)
n_users_with_confident_recommendations = len(with_confident_recommendations_set_users)

# Item set lengths:
n_total_items = len(all_interaction_items)
n_items_in_cf = len(collaborative_filtering_set_items)
n_items_in_ml = len(machine_learning_set_items)
n_items_in_cf_and_ml = len(set(collaborative_filtering_set_items).intersection(set(machine_learning_set_items)))
n_items_with_recommendations = len(recommendation_set_items)
n_items_with_confident_recommendations = len(with_confident_recommendations_set_items)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
if STATIC_SANKEY_CHARTS_STYLE == 'aligned_with_flow_dependencies':
    # Users sankey data:
    user_set_labels = ["all", "all'", "in_cf", "in_ml", "in_cf_&_ml", "with_recos", "with_strong_recos"]
    user_set_numbers = [n_total_users, n_total_users, n_users_in_cf, n_users_in_ml, n_users_in_cf_and_ml,
                        n_users_with_recommendations, n_users_with_confident_recommendations]
    users_recommendation_pipeline_df = process_sequential_data_for_static_sankey_chart(user_set_labels,
                                                                                       user_set_numbers,
                                                                                       True,
                                                                                       n_total_users)
    # Items sankey data:
    item_set_labels = ["all", "all'", "in_cf", "in_ml", "in_cf_&_ml", "with_recos", "with_strong_recos"]
    item_set_numbers = [n_total_items, n_total_items, n_items_in_cf, n_items_in_ml, n_items_in_cf_and_ml,
                        n_items_with_recommendations, n_items_with_confident_recommendations]
    items_recommendation_pipeline_df = process_sequential_data_for_static_sankey_chart(item_set_labels,
                                                                                       item_set_numbers,
                                                                                       True,
                                                                                       n_total_users)

else:
    # Users sankey data:
    user_set_labels_1 = ["all", "all'", "in_cf"]
    user_set_numbers_1 = [n_total_users, n_total_users, n_users_in_cf]
    user_static_sankey_chart_dataframe_1 = process_sequential_data_for_static_sankey_chart(user_set_labels_1,
                                                                                           user_set_numbers_1,
                                                                                           True,
                                                                                           n_total_users)
    user_set_labels_2 = ["all'", "in_ml"]
    user_set_numbers_2 = [n_total_users, n_users_in_ml]
    user_static_sankey_chart_dataframe_2 = process_sequential_data_for_static_sankey_chart(user_set_labels_2,
                                                                                           user_set_numbers_2,
                                                                                           True,
                                                                                           n_total_users)

    user_set_labels_3 = ["in_ml", "in_cf_&_ml"]
    user_set_numbers_3 = [n_users_in_ml, n_users_in_cf_and_ml]
    user_static_sankey_chart_dataframe_3 = process_sequential_data_for_static_sankey_chart(user_set_labels_3,
                                                                                           user_set_numbers_3,
                                                                                           True,
                                                                                           n_total_users)
    user_set_labels_4 = ["in_cf", "in_cf_&_ml"]
    user_set_numbers_4 = [n_users_in_cf, n_users_in_cf_and_ml]
    user_static_sankey_chart_dataframe_4 = process_sequential_data_for_static_sankey_chart(user_set_labels_4,
                                                                                           user_set_numbers_4,
                                                                                           True,
                                                                                           n_total_users)

    user_set_labels_scoring = ["all'", "with_recos", "with_strong_recos"]
    user_set_numbers_scoring = [n_total_users,  n_users_with_recommendations, n_users_with_confident_recommendations]
    user_static_sankey_chart_dataframe_scoring = process_sequential_data_for_static_sankey_chart(user_set_labels_scoring,
                                                                                                 user_set_numbers_scoring,
                                                                                                 True,
                                                                                                 n_total_users)
    users_recommendation_pipeline_df = pd.concat([user_static_sankey_chart_dataframe_1,
                                                  user_static_sankey_chart_dataframe_2,
                                                  user_static_sankey_chart_dataframe_3,
                                                  user_static_sankey_chart_dataframe_4,
                                                  user_static_sankey_chart_dataframe_scoring])

    # Items sankey data:
    item_set_labels_1 = ["all", "all'", "in_cf"]
    item_set_numbers_1 = [n_total_items, n_total_items, n_items_in_cf]
    item_static_sankey_chart_dataframe_1 = process_sequential_data_for_static_sankey_chart(item_set_labels_1,
                                                                                           item_set_numbers_1,
                                                                                           True,
                                                                                           n_total_items)
    item_set_labels_2 = ["all'", "in_ml"]
    item_set_numbers_2 = [n_total_items, n_items_in_ml]
    item_static_sankey_chart_dataframe_2 = process_sequential_data_for_static_sankey_chart(item_set_labels_2,
                                                                                           item_set_numbers_2,
                                                                                           True,
                                                                                           n_total_items)

    item_set_labels_3 = ["in_ml", "in_cf_&_ml"]
    item_set_numbers_3 = [n_items_in_ml, n_items_in_cf_and_ml]
    item_static_sankey_chart_dataframe_3 = process_sequential_data_for_static_sankey_chart(item_set_labels_3,
                                                                                           item_set_numbers_3,
                                                                                           True,
                                                                                           n_total_items)
    item_set_labels_4 = ["in_cf", "in_cf_&_ml"]
    item_set_numbers_4 = [n_items_in_cf, n_items_in_cf_and_ml]
    item_static_sankey_chart_dataframe_4 = process_sequential_data_for_static_sankey_chart(item_set_labels_4,
                                                                                           item_set_numbers_4,
                                                                                           True,
                                                                                           n_total_items)

    item_set_labels_scoring = ["all'", "with_recos", "with_strong_recos"]
    item_set_numbers_scoring = [n_total_items,  n_items_with_recommendations, n_items_with_confident_recommendations]
    item_static_sankey_chart_dataframe_scoring = process_sequential_data_for_static_sankey_chart(item_set_labels_scoring,
                                                                                                 item_set_numbers_scoring,
                                                                                                 True,
                                                                                                 n_total_items)
    items_recommendation_pipeline_df = pd.concat([item_static_sankey_chart_dataframe_1,
                                                  item_static_sankey_chart_dataframe_2,
                                                  item_static_sankey_chart_dataframe_3,
                                                  item_static_sankey_chart_dataframe_4,
                                                  item_static_sankey_chart_dataframe_scoring])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
users_recommendation_pipeline_df = users_recommendation_pipeline_df[["origin", "destination", "weight", "weight_scaled"]]
items_recommendation_pipeline_df = items_recommendation_pipeline_df[["origin", "destination", "weight", "weight_scaled"]]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
users_recommendation_pipeline = dataiku.Dataset("users_recommendation_pipeline")
users_recommendation_pipeline.write_with_schema(users_recommendation_pipeline_df)
items_recommendation_pipeline = dataiku.Dataset("items_recommendation_pipeline")
items_recommendation_pipeline.write_with_schema(items_recommendation_pipeline_df)