# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import re
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from market_basket_analysis.association_rules import (compute_association_rules,
                                                      get_association_rules_datasets_schemas)
from market_basket_analysis.dku_utils import (get_current_project_and_variables,
                                              clear_dataset)
from market_basket_analysis.utils import melt_dataframe
from market_basket_analysis.config.flow.constants import (DISTINCT_ITEMSETS_COLUMNS,
                                                          ASSOCIATION_RULES_COLUMNS,
                                                          ASSOCIATION_RULES_RECIPE_OUTPUT_DATASETS)
from market_basket_analysis.config.flow.connections import ALLOWED_CLOUD_PROVIDERS_FILESYSTEM_STORAGES

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
max_itemsets_size = variables["standard"]["max_frequent_itemsets_size_app"]
itemsets_min_support = variables["standard"]["itemsets_min_frequency_app"]
confidence_threshold = variables["standard"]["rules_confidence_threshold_app"]
compute_refined_rules = variables["standard"].get("compute_refined_rules_app")
connection_type = variables["standard"]["connection_type_app"]

if compute_refined_rules :
    association_rules_scope = variables["standard"].get("association_rules_scope_app")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Recipe inputs :
transactions_preprocessed = dataiku.Dataset("transactions_preprocessed")
transactions_preprocessed_df = transactions_preprocessed.get_dataframe(infer_with_pandas=False)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Recipe outputs :
association_rules = dataiku.Dataset("association_rules")
distinct_itemsets = dataiku.Dataset("distinct_itemsets")
rules_denormalized_antecedents = dataiku.Dataset("rules_denormalized_antecedents")
rules_denormalized_consequents = dataiku.Dataset("rules_denormalized_consequents")
association_rules_summary = dataiku.Dataset("association_rules_summary")

# Clearing recipe outputs:
for dataset_name in ASSOCIATION_RULES_RECIPE_OUTPUT_DATASETS:
    clear_dataset(project, dataset_name)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
loaded_transactions_items = []
for items_string in transactions_preprocessed_df["transaction_items"]:
    if connection_type in ALLOWED_CLOUD_PROVIDERS_FILESYSTEM_STORAGES:
        items_string = re.sub('"|\[|\]', '', items_string)
        items_list = items_string.split("/|\\")
    else:
        items_list = np.unique(json.loads(items_string))
    loaded_transactions_items.append(items_list)

transactions_preprocessed_df["transaction_items"] = loaded_transactions_items

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
if compute_refined_rules:
    # Computing the different rule scopes:
    transactions_preprocessed_df["rule_scope"] = \
    transactions_preprocessed_df[association_rules_scope].apply(tuple, axis=1)
    rule_scope_tuples = np.unique(transactions_preprocessed_df["rule_scope"])
    n_rule_scopes = len(rule_scope_tuples)
    print("{} rule scopes found : '{}' ".format(n_rule_scopes, rule_scope_tuples))

    # Connecting recipe output datasets writers:
    association_rules_writer = association_rules.get_writer()
    distinct_itemsets_writer = distinct_itemsets.get_writer()
    rules_denormalized_antecedents_writer = rules_denormalized_antecedents.get_writer()
    rules_denormalized_consequents_writer = rules_denormalized_consequents.get_writer()
    association_rules_summary_writer = association_rules_summary.get_writer()

    # Iterating over the different rule socpes
    for loop_index, rule_scope_tuple in enumerate(rule_scope_tuples):
        rule_scope_str = "_".join(str(elem) for elem in rule_scope_tuple)
        granular_df = transactions_preprocessed_df[transactions_preprocessed_df["rule_scope"]==rule_scope_tuple]

        print("Computing association rules on scope : {} (n°{}/{})".format(rule_scope_str,
                                                                           loop_index+1,
                                                                           n_rule_scopes))
        association_rules_found,df_association_rules, n_rules_found,\
        df_association_rules_summary, df_itemsets_supports =\
        compute_association_rules(granular_df, "transaction_items", itemsets_min_support,
                                  max_itemsets_size,
                                  confidence_threshold,
                                  rule_scope=rule_scope_str)
        for column_index, column in enumerate(association_rules_scope):
            df_association_rules[column] = rule_scope_tuple[column_index]
            df_itemsets_supports[column] = rule_scope_tuple[column_index]

        print("Passing association rules data to the writer...")
        if association_rules_found:
            df_association_rules = df_association_rules[association_rules_scope + ["rule_id", "rule_scope"] + ASSOCIATION_RULES_COLUMNS]
            association_rules_writer.write_dataframe(df_association_rules)


        print("Passing frequent itemsets data to the writer [scope : {} (n°{}/{})]...".format(rule_scope_str,
                                                                                              loop_index+1,
                                                                                              n_rule_scopes))
        if association_rules_found:
            df_itemsets_supports = df_itemsets_supports[association_rules_scope + ["rule_scope"] + DISTINCT_ITEMSETS_COLUMNS]
            distinct_itemsets_writer.write_dataframe(df_itemsets_supports)
            del df_itemsets_supports

            print("Indexing antecedent items/rules ...")
            df_rules_denormalized_antecedents = melt_dataframe(df_association_rules, "rule_antecedent", {"rule_antecedent":"antecedent_item"})

            print("Passing antecedent items/rules data to the writer [scope : {} (n°{}/{})]...".format(rule_scope_str,
                                                                                                       loop_index+1,
                                                                                                       n_rule_scopes))

            rules_denormalized_antecedents_writer.write_dataframe(df_rules_denormalized_antecedents)
            del df_rules_denormalized_antecedents

            print("Indexing consequent items/rules data to the writer [scope : {} (n°{}/{})]...".format(rule_scope_str,
                                                                                                        loop_index+1,
                                                                                                        n_rule_scopes))
            df_rules_denormalized_consequents = melt_dataframe(df_association_rules, "rule_consequent", {"rule_consequent":"consequent_item"})

            print("Passing consequent items/rules data to the writer [scope : {} (n°{}/{})]...".format(rule_scope_str,
                                                                                                       loop_index+1,
                                                                                                       n_rule_scopes))

            rules_denormalized_consequents_writer.write_dataframe(df_rules_denormalized_consequents)

            print("Passing association rules summary data to the writer [scope : {} (n°{}/{})]...".format(rule_scope_str,
                                                                                                          loop_index+1,
                                                                                                          n_rule_scopes))

            association_rules_summary_writer.write_dataframe(df_association_rules_summary)

    association_rules_writer.close()
    distinct_itemsets_writer.close()
    rules_denormalized_antecedents_writer.close()
    rules_denormalized_consequents_writer.close()
    association_rules_summary_writer.close()

else:

    print("Looking for frequent itemsets ...")
    association_rules_found, df_association_rules, n_rules_found, df_association_rules_summary,  df_itemsets_supports =\
    compute_association_rules(transactions_preprocessed_df,
                              "transaction_items",
                              itemsets_min_support,
                              max_itemsets_size,
                              confidence_threshold)

    df_association_rules = df_association_rules[["rule_id"] + ASSOCIATION_RULES_COLUMNS]

    if association_rules_found:
        print("Writing association rules data ...")
        association_rules.write_with_schema(df_association_rules, dropAndCreate=False)

        print("Writing frequent itemsets data ...")
        distinct_itemsets.write_with_schema(df_itemsets_supports, dropAndCreate=False)

        del df_itemsets_supports

        print("Indexing antecedent items/rules ...")
        df_rules_denormalized_antecedents = melt_dataframe(df_association_rules, "rule_antecedent", {"rule_antecedent":"antecedent_item"})

        print("Writing antecedent items/rules ...")
        rules_denormalized_antecedents.write_with_schema(df_rules_denormalized_antecedents, dropAndCreate=False)

        print("Indexing consequent items/rules ...")
        df_rules_denormalized_consequents = melt_dataframe(df_association_rules, "rule_consequent", {"rule_consequent":"consequent_item"})

        print("Writing consequent items/rules ...")
        rules_denormalized_consequents.write_with_schema(df_rules_denormalized_consequents, dropAndCreate=False)

        print("Writing association rules summary ...")
        association_rules_summary.write_with_schema(df_association_rules_summary, dropAndCreate=False)