# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
import json
import time

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from distribution_spatial_footprint.geographic_handling import (read_geo_point,
                                                                geo_point_is_in_geojson,
                                                                compute_geodesic_distance,
                                                                PolygonsIndexer)
from distribution_spatial_footprint.utils import (extract_isochrone_value,
                                                  preprocess_customers_for_webapp,
                                                  update_locations_customers_search_status)
from distribution_spatial_footprint.config import (WEBAPP_COLUMNS_TO_DROP,
                                                   USE_KD_TREE_INDEXING,
                                                   KD_TREE_DEFAULT_NEIGHBORS,
                                                   SUCCCESSIVE_EXCLUSIONS_STOPPING)
from dku_utils import get_current_project_and_variables

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
locations_isochrones = dataiku.Dataset("locations_isochrones")
locations_isochrones_df = locations_isochrones.get_dataframe()
customers_prepared = dataiku.Dataset("customers_prepared")
customers_prepared_df = customers_prepared.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_isochrones_denormalized_df = dataiku.Dataset("locations_isochrones_denormalized").get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project, variables = get_current_project_and_variables()
global_variables = variables["standard"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
isochrones_range_min = global_variables["isochrones_for_customers"]
n_isochrone_types = len(isochrones_range_min)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
distribution_locations = list(locations_isochrones_df["location_identifier"])
distribution_geo_points = [read_geo_point(geo_point) for geo_point in locations_isochrones_df["geo_point"]]
polygons_indexer = PolygonsIndexer(distribution_geo_points)
n_locations = len(distribution_locations)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
customers = list(customers_prepared_df["customer_id"])
customers_geo_points = {customer: read_geo_point(geo_point)
                        for customer, geo_point
                        in zip(customers, customers_prepared_df["geo_point"])}
n_customers = len(customers)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers = pd.DataFrame(columns=["location_identifier",
                                                            "location_id",
                                                            "isochrone_type",
                                                            "included_customer_id",
                                                            "distance_customer_location"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
start_time = time.time()
print("Checking the customer locations inclusion in distribution locations isochrones ...")
loop_percents = list(range(0, 100))
index_loop = 0

if USE_KD_TREE_INDEXING :
    n_loop_combinations = n_isochrone_types*n_customers
    kd_tree_neighbors = min(KD_TREE_DEFAULT_NEIGHBORS, n_locations)
else:
    n_loop_combinations = n_isochrone_types*n_locations*n_customers

for index_isochrone, isochrone_type in enumerate(isochrones_range_min):
    print("Looking for isochrones of type '{}' inclusions ...".format(isochrone_type))
    focus_isochrones = list(json.loads(location_isochrone) for location_isochrone in\
                            locations_isochrones_df[isochrone_type])
    location_isochrones = {location:location_isochrone
                           for location, location_isochrone
                           in zip(distribution_locations, focus_isochrones)}

    for index_customer, customer in enumerate(customers):
        customer_geo_point = customers_geo_points[customer]
        try:
            if USE_KD_TREE_INDEXING :
                spent_time = time.time() - start_time
                index_loop+= 1


                geo_point_belonging_polygons, n_polygons_found =\
                polygons_indexer.search_geo_point_belonging_polygons(customer_geo_point,
                                                                     focus_isochrones,
                                                                     kd_tree_neighbors,
                                                                     SUCCCESSIVE_EXCLUSIONS_STOPPING
                                                                    )

                for location_id in geo_point_belonging_polygons:
                    location = distribution_locations[location_id]
                    location_geo_point = distribution_geo_points[location_id]
                    distance_customer_location = compute_geodesic_distance(customer_geo_point, location_geo_point, True)

                    df_distribution_locations_customers =\
                    df_distribution_locations_customers.append(
                        {"location_identifier": location,
                         "location_id": location_id,
                         "isochrone_type": isochrone_type,
                         "included_customer_id": customer,
                         "distance_customer_location": distance_customer_location},
                        ignore_index=True)
            else:

                for index_location, location in enumerate(distribution_locations):
                    spent_time = time.time() - start_time
                    index_loop+= 1

                    location_isochrone = location_isochrones[location]
                    customer_is_in_isochrone = geo_point_is_in_geojson(customer_geo_point, location_isochrone)

                    if customer_is_in_isochrone:
                        location_geo_point = distribution_geo_points[index_location]
                        distance_customer_location = compute_geodesic_distance(customer_geo_point, location_geo_point, True)
                        df_distribution_locations_customers =\
                        df_distribution_locations_customers.append(
                            {"location_identifier":location,
                             "location_id":index_location,
                             "isochrone_type":isochrone_type,
                             "included_customer_id":customer,
                             "distance_customer_location":distance_customer_location},
                            ignore_index=True)
            loop_percents, log_message =\
            update_locations_customers_search_status(loop_percents, spent_time, index_loop, n_loop_combinations)
            if log_message is not None:
                print(log_message)

        except Exception as e:
            log_message = "Exception : '{}' ".format(str(e))
            log_message += "Isochrone type '{}' ({}/{}) | ".format(isochrone_type, index_isochrone+1, n_isochrone_types)
            if not USE_KD_TREE_INDEXING:
                log_message+= "location '{}' ({}/{}) | ".format(location, index_location+1, n_locations)
            log_message+= "customer '{}' ({}/{})".format(customer, index_customer+1, n_customers)
            print(log_message)
print("Customers inclusion in isochrones | Total spent time : {} seconds.".format(time.time() - start_time))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers["isochrone_value"] =\
df_distribution_locations_customers["isochrone_type"].apply(lambda x: extract_isochrone_value(x))
df_distribution_locations_customers.sort_values(by=["location_identifier", "isochrone_value"], inplace=True)
df_distribution_locations_customers.drop("isochrone_value", axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers["location_id"] =\
df_distribution_locations_customers["location_id"].astype(str)
locations_isochrones_denormalized_df["location_id"] =\
locations_isochrones_denormalized_df["location_id"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
locations_isochrones_denormalized_df.drop("isochrone_data", axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers =\
df_distribution_locations_customers.merge(locations_isochrones_denormalized_df,
                                          left_on=["location_id", "isochrone_type"],
                                          right_on=["location_id", "isochrone_type"])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers["included_customer_id"] =\
df_distribution_locations_customers["included_customer_id"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers["customer_id_denormalized"] =\
df_distribution_locations_customers["isochrone_id"] + "_" + df_distribution_locations_customers["included_customer_id"]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
customers_prepared_df["customer_id"] = customers_prepared_df["customer_id"].astype(str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers = \
df_distribution_locations_customers.merge(customers_prepared_df,
                                          left_on="included_customer_id",
                                          right_on="customer_id")
df_distribution_locations_customers.drop("customer_id", axis=1, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df_distribution_locations_customers.sort_values(by=["location_id", "included_customer_id"],
                                                ascending=True,
                                                inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
locations_customers = dataiku.Dataset("locations_customers")
locations_customers.write_with_schema(df_distribution_locations_customers)