# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from dku_utils import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from dku_utils import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
locations_customers_agg = dataiku.Dataset("locations_customers_agg")
locations_customers_agg_df = locations_customers_agg.get_dataframe()
customers_prepared = dataiku.Dataset("customers_prepared")
customers_prepared_df = customers_prepared.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()
global_variables = variables["standard"]
isochrones_range_min = global_variables["isochrones_for_customers"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
agg_customers = \
locations_customers_agg_df[["location_identifier", "in_isochrone_customers"]].groupby("location_identifier").sum().reset_index()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
agg_customers.rename({"in_isochrone_customers": "all_isochrones_customers"}, axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_customers_agg_df = locations_customers_agg_df.merge(agg_customers, how="left", on="location_identifier")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_customers_agg_df["in_isochrone_customers_ratio"] = \
locations_customers_agg_df["in_isochrone_customers"] / locations_customers_agg_df["all_isochrones_customers"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
total_dataset_customers = len(customers_prepared_df)
locations_customers_agg_df["total_dataset_customers"] = total_dataset_customers

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_customers_agg_df["in_isochrone_customers_vs_total_dataset_customers_ratio"] = \
locations_customers_agg_df["in_isochrone_customers"] / locations_customers_agg_df["total_dataset_customers"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_customers_agg_df["all_isochrone_customers_vs_total_dataset_customers_ratio"] = \
locations_customers_agg_df["all_isochrones_customers"] / locations_customers_agg_df["total_dataset_customers"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
locations_customers_agg_enriched = dataiku.Dataset("locations_customers_agg_enriched")
locations_customers_agg_enriched.write_with_schema(locations_customers_agg_df)