# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from collections import Counter

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.dku_utils.core import get_current_project_and_variables
from project_root.dku_utils.visual_ml.classification import get_deployed_model_used_threshold
from project_root.dku_utils.folders.pickles.folder_pickles import write_pickle_in_managed_folder
from project_root.dku_utils.folders.pictures.folder_pictures import (write_bytes_picture_in_managed_folder)
from project_root.dku_utils.folders.pictures.pictures_utils import (homothetic_rescale_pillow_picture,
                                                                    convert_picture_from_pillow_to_bytes,
                                                                    convert_picture_from_base64_string_or_bytes_to_np_array,
                                                                    convert_picture_from_np_array_to_pillow)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.product_recommendations.dates_handling import (compute_difference_between_datetime_timestamps,
                                                                 from_dss_string_date_to_datetime)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from project_root.product_recommendations.config.auto_ml.constants import RECOMMENDATION_MODELING_DEPLOYED_MODEL_ID
from project_root.product_recommendations.config.webapp.constants import WEBAPP_DEFAULT_PICTURE_BASE64_STRING

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()
app_variables = variables["standard"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling webapp default picture:
picture_max_shape = variables["standard"]["pictures_max_shape_app"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
np_array_webapp_default_picture = \
convert_picture_from_base64_string_or_bytes_to_np_array(WEBAPP_DEFAULT_PICTURE_BASE64_STRING)
pillow_webapp_default_picture = convert_picture_from_np_array_to_pillow(np_array_webapp_default_picture)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
rgb_pillow_webapp_default_picture  = pillow_webapp_default_picture.convert('RGB')
pillow_webapp_default_picture_rescaled = homothetic_rescale_pillow_picture(rgb_pillow_webapp_default_picture, picture_max_shape)
bytes_webapp_default_picture = convert_picture_from_pillow_to_bytes(pillow_webapp_default_picture_rescaled, "jpeg")
write_bytes_picture_in_managed_folder(project, "webapp_folder", bytes_webapp_default_picture, "webapp_default_picture.jpg")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling batch information:
deployed_model_used_threshold = get_deployed_model_used_threshold(project,
                                                                  RECOMMENDATION_MODELING_DEPLOYED_MODEL_ID)
batch_start_date = app_variables["batch_start_date_app"]
batch_end_date = app_variables["batch_end_date_app"]
batch_start_date_datetime = from_dss_string_date_to_datetime(batch_start_date)
batch_end_date_datetime = from_dss_string_date_to_datetime(batch_end_date)
batch_days = compute_difference_between_datetime_timestamps(batch_end_date_datetime,
                                                            batch_start_date_datetime,
                                                            "days",
                                                            True,
                                                            True)
batch_information = {
    "deployed_model_threshold": deployed_model_used_threshold,
    "start_date": batch_start_date,
    "end_date": batch_end_date,
    "batch_days": batch_days
}
write_pickle_in_managed_folder(project, "webapp_folder", batch_information, "batch_information")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling item similarities:
item_similarities = dataiku.Dataset("item_similarities")
item_similarities_df = item_similarities.get_dataframe(infer_with_pandas=False)
item_similarities_df["item_id_1"] = item_similarities_df["item_id_1"].astype(str)
item_similarities_df["item_id_2"] = item_similarities_df["item_id_2"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_similarities_df = item_similarities_df.sort_values(by=["item_id_1", "similarity"],\
                                                        axis=0,
                                                        ascending=False).reset_index(drop=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_similarities_agg_df = pd.DataFrame(item_similarities_df.groupby("item_id_1")["item_id_2"].apply(list)).reset_index()
item_similarities_agg_df.rename({"item_id_1": "item", "item_id_2": "similar_items"}, axis=1, inplace=True)

item_similarities_scores_agg_df = pd.DataFrame(item_similarities_df.groupby("item_id_1")["similarity"].apply(list)).reset_index()
item_similarities_scores_agg_df.rename({"item_id_1": "item", "similarity": "similar_item_scores"}, axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_similarities_agg_df = item_similarities_agg_df.sort_values(by=["item"],\
                                                                axis=0,
                                                                ascending=False)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_similarities_scores_agg_df = item_similarities_scores_agg_df.sort_values(by=["item"],\
                                                                axis=0,
                                                                ascending=False)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_items = list(item_similarities_agg_df["item"])
all_item_similarities = list(item_similarities_agg_df["similar_items"])
all_item_similarity_scores = list(item_similarities_scores_agg_df["similar_item_scores"])
# Unloading item_similarities_scores_agg_df:
del item_similarities_scores_agg_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_similarities_data = {}
for item, similar_items, similar_item_scores in zip(all_items, all_item_similarities, all_item_similarity_scores):
    item_similarities_data[item] = {"similar_items": similar_items,
                                    "similar_item_scores": similar_item_scores,
                                    "n_similar_items": len(similar_items)
                                   }

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
write_pickle_in_managed_folder(project, "webapp_folder", item_similarities_data, "item_similarities_data")
write_pickle_in_managed_folder(project, "webapp_folder", all_items, "items_with_similarities")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling number of recommendations per user and items:

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_top_interaction_predictions_df = dataiku.Dataset("user_top_interaction_predictions")\
.get_dataframe(columns=["user_id", "item_id", "proba_1", "prediction"], infer_with_pandas=False)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_top_interaction_predictions_df["user_id"] = user_top_interaction_predictions_df["user_id"].astype(str)
user_top_interaction_predictions_df["item_id"] = user_top_interaction_predictions_df["item_id"].astype(str)
user_top_interaction_predictions_df["prediction"] = user_top_interaction_predictions_df["prediction"].astype(int)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_top_interaction_predictions_df.sort_values(by=["proba_1", "user_id"], ascending=False, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_nb_predictions_df = user_top_interaction_predictions_df[["user_id", "prediction"]]\
.groupby(["user_id"]).sum().sort_values(by="prediction", ascending=False).reset_index()

item_nb_predictions_df = user_top_interaction_predictions_df[["item_id", "prediction"]]\
.groupby(["item_id"]).sum().sort_values(by="prediction", ascending=False).reset_index()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
users_with_recommendations = list(user_nb_predictions_df["user_id"])
items_with_recommendations = list(item_nb_predictions_df["item_id"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_recommendation_counts = dict(Counter(user_top_interaction_predictions_df["user_id"]))
item_recommendation_counts = dict(Counter(user_top_interaction_predictions_df["item_id"]))
n_recommended_items = len(item_recommendation_counts)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_recommendations = {}
for user_id, item_id, proba_1 in zip(user_top_interaction_predictions_df["user_id"],
                                     user_top_interaction_predictions_df["item_id"],
                                     user_top_interaction_predictions_df["proba_1"]):
    if user_id not in user_recommendations.keys():
        user_recommendations[user_id] = {
            "recommendations" : [],
            "recommendation_probas": {},
            "webapp_gauge_colors": {}
        }
        pass
    user_recommendations[user_id]["recommendations"].append(item_id)
    user_recommendations[user_id]["recommendation_probas"][item_id] = proba_1
    user_recommendations

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Unloading user_top_interaction_predictions_df:
del user_top_interaction_predictions_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_recommendations_counts_data = []
for item_id in items_with_recommendations:
    item_recommendations_counts_data.append({"item_id": item_id,
                                             "n_recommendations": item_recommendation_counts[item_id]})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
item_recommendation_counts_df = pd.DataFrame(item_recommendations_counts_data)
item_recommendation_counts_df.sort_values(by="n_recommendations", ascending=False, inplace=True)
item_recommendation_counts_df["recommendation_rank"] = list(range(1, n_recommended_items + 1))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_recommendations_data = {}
item_recommendations_data = {}
recommendation_rank_item_mapping = {}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for user_id in users_with_recommendations:
    user_recommendations_data[user_id] = {"n_recommendations": user_recommendation_counts[user_id],
                                          "recommendations": user_recommendations[user_id]["recommendations"],
                                          "recommendation_probas": user_recommendations[user_id]["recommendation_probas"],
                                          "webapp_gauge_colors": user_recommendations[user_id]["webapp_gauge_colors"]
                                         }

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for row in item_recommendation_counts_df.iterrows():
    item_id = row[1]["item_id"]
    n_recommendations = row[1]["n_recommendations"]
    recommendation_rank = row[1]["recommendation_rank"]
    item_recommendations_data["item_id"] = {"n_recommendations": n_recommendations,
                                            "recommendation_rank": recommendation_rank}
    recommendation_rank_item_mapping[recommendation_rank] = item_id

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
write_pickle_in_managed_folder(project, "webapp_folder", users_with_recommendations, "users_with_recommendations")
write_pickle_in_managed_folder(project, "webapp_folder", items_with_recommendations, "items_with_recommendations")
write_pickle_in_managed_folder(project, "webapp_folder", user_recommendations_data, "user_recommendations_data")
write_pickle_in_managed_folder(project, "webapp_folder", item_recommendations_data, "item_recommendations_data")
write_pickle_in_managed_folder(project, "webapp_folder", recommendation_rank_item_mapping, "recommendation_rank_item_mapping")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling user past interactions:
user_item_interactions_df = dataiku.Dataset("user_item_interactions")\
.get_dataframe(columns=["user_id", "item_id"], infer_with_pandas=False)
user_item_interactions_df["user_id"] = user_item_interactions_df["user_id"].astype(str)
user_item_interactions_df["item_id"] = user_item_interactions_df["item_id"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
users_in_interactions_history = list(set(user_item_interactions_df["user_id"]))
users_in_interactions_history_with_recommendations = list(set(users_in_interactions_history)\
                                                      .intersection(set(users_with_recommendations)))

# Filtering user_item_interactions_df on users having recommendations:
user_item_interactions_df = user_item_interactions_df[\
                                                      user_item_interactions_df["user_id"]\
                                                      .isin(users_in_interactions_history_with_recommendations)]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_past_interactions_agg_df =\
pd.DataFrame(user_item_interactions_df\
             .groupby("user_id")["item_id"].apply(list)).reset_index()
user_past_interactions_agg_df.rename({"item_id":"historical_interactions"}, axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_interactions_counts = dict(Counter(user_item_interactions_df["user_id"]))
item_interactions_counts = dict(Counter(user_item_interactions_df["item_id"]))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Unloading user_item_interactions_df:
del user_item_interactions_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_interactions_data = []
item_interactions_data = []

for user_id, n_user_interactions in user_interactions_counts.items():
    user_interactions_data.append({"user_id": user_id, "n_interactions": n_user_interactions})
    pass

for item_id, n_item_interactions in item_interactions_counts.items():
    item_interactions_data.append({"item_id": item_id, "n_interactions": n_item_interactions})
    pass

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_interactions_df = pd.DataFrame(user_interactions_data)
user_interactions_df.sort_values(by=["n_interactions"], ascending=False, inplace=True)
user_interactions_df.reset_index(drop=True, inplace=True)
user_interactions_df["interaction_rank"] = list(range(1, len(user_interactions_df)+1))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_interactions_df = user_interactions_df.merge(user_past_interactions_agg_df, how="left", on=["user_id"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
user_interactions_data = {}
interaction_rank_user_mapping = {}

for row in user_interactions_df.iterrows():
    user_id = row[1]["user_id"]
    interaction_rank = row[1]["interaction_rank"]
    historical_interactions = row[1]["historical_interactions"]
    historical_interactions_counts = dict(Counter(historical_interactions))
    user_interactions_data[user_id] = {"historical_interactions": list(np.unique(historical_interactions)),
                                       "historical_interactions_counts": historical_interactions_counts,
                                       "n_interactions": row[1]["n_interactions"],
                                       "interaction_rank": interaction_rank
                                      }
    interaction_rank_user_mapping[interaction_rank] = user_id

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
interaction_rank_item_mapping = {}
item_interactions_df = pd.DataFrame(item_interactions_data)
item_interactions_df.sort_values(by=["n_interactions"], ascending=False, inplace=True)
item_interactions_df.reset_index(drop=True, inplace=True)
item_interactions_df["interaction_rank"] = list(range(1, len(item_interactions_df)+1))
for row in item_interactions_df.iterrows():
    item_id = row[1]["item_id"]
    interaction_rank = row[1]["interaction_rank"]
    interaction_rank_item_mapping[interaction_rank] = item_id

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
write_pickle_in_managed_folder(project, "webapp_folder", user_interactions_data, "user_interactions_data")
write_pickle_in_managed_folder(project, "webapp_folder", interaction_rank_user_mapping, "interaction_rank_user_mapping")
write_pickle_in_managed_folder(project, "webapp_folder", interaction_rank_item_mapping, "interaction_rank_item_mapping")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling item pictures and metadata:
leverage_item_metadata = app_variables["leverage_item_metadata_app"]
use_product_pictures = app_variables["use_product_pictures_app"]

if leverage_item_metadata or use_product_pictures:
    item_metadata = dataiku.Dataset("item_metadata_prepared")
    item_metadata_df = item_metadata.get_dataframe(infer_with_pandas=False)
    item_metadata_df["item_id"] = item_metadata_df["item_id"].astype(str)

    if use_product_pictures:
        item_pictures_mapping = {}
        for item_id, item_picture in zip(item_metadata_df["item_id"], item_metadata_df["item_picture"]):
            item_pictures_mapping[item_id] = item_picture
        write_pickle_in_managed_folder(project, "webapp_folder", item_pictures_mapping, "item_pictures_mapping")

    if leverage_item_metadata:
        item_metadata_df = item_metadata_df[[column for column in item_metadata_df.columns if column != "item_picture"]]
        item_metadata_columns = [column for column in item_metadata_df.columns if column != "item_id"]
        item_metadata_items = list(item_metadata_df["item_id"])
        all_item_metadata_information = {}
        for column_name in item_metadata_columns:
            all_item_metadata_information[column_name] = list(item_metadata_df[column_name])

        item_metadata_mapping = {}
        for item_index, item_id in enumerate(item_metadata_items):
            item_metadata_mapping[item_id] = {}
            for column_name in item_metadata_columns:
                column_data = all_item_metadata_information[column_name][item_index]
                item_metadata_mapping[item_id][column_name] = column_data

if not leverage_item_metadata:
    item_metadata_columns = []
    item_metadata_mapping = {}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Handling user metadata:
leverage_user_metadata = app_variables["leverage_user_metadata_app"]
if leverage_user_metadata:
    user_metadata = dataiku.Dataset("user_metadata_prepared")
    user_metadata_df = user_metadata.get_dataframe(infer_with_pandas=False)
    user_metadata_df["user_id"] = user_metadata_df["user_id"].astype(str)

    user_metadata_columns = [column for column in user_metadata_df.columns if column != "user_id"]
    user_metadata_users = list(user_metadata_df["user_id"])
    all_user_metadata_information = {}
    for column_name in user_metadata_columns:
        all_user_metadata_information[column_name] = list(user_metadata_df[column_name])

    user_metadata_mapping = {}
    for user_index, user_id in enumerate(user_metadata_users):
        user_metadata_mapping[user_id] = {}
        for column_name in user_metadata_columns:
            column_data = all_user_metadata_information[column_name][user_index]
            user_metadata_mapping[user_id][column_name] = column_data
else:
    user_metadata_columns = []
    user_metadata_mapping = {}

write_pickle_in_managed_folder(project, "webapp_folder", user_metadata_columns, "user_metadata_columns")
write_pickle_in_managed_folder(project, "webapp_folder", user_metadata_mapping, "user_metadata_mapping")
write_pickle_in_managed_folder(project, "webapp_folder", item_metadata_columns, "item_metadata_columns")
write_pickle_in_managed_folder(project, "webapp_folder", item_metadata_mapping, "item_metadata_mapping")