# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import pyproj    
from shapely.ops import transform
import shapely.ops as ops
from shapely.geometry.polygon import Polygon
from functools import partial

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
from real_estate_pricing.geographic_handling.formating.polygons import convert_polygon_from_wkt_string_to_shapely

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def compute_polygon_area(shapely_polygon):
    polygon_area = ops.transform(
        partial(
            pyproj.transform,
            pyproj.Proj(init='EPSG:4326'),
            pyproj.Proj(
                proj='aea',
                lat_1=shapely_polygon.bounds[1],
                lat_2=shapely_polygon.bounds[3]
            )
        ),
        shapely_polygon)
    return polygon_area.area

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Read recipe inputs
census_data = dataiku.Dataset("census_data")
census_data_df = census_data.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_polygons = list(np.unique(census_data_df["census_polygon"]))
n_census_polygons = len(census_polygons)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_polygons_areas = {}

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for wkt_polygon_index, wkt_polygon in enumerate(census_polygons):
    print("Handling polygon {}/{}".format(wkt_polygon_index + 1,n_census_polygons))
    shapely_polygon = convert_polygon_from_wkt_string_to_shapely(wkt_polygon, False)
    polygon_area = compute_polygon_area(shapely_polygon)
    census_polygons_areas[wkt_polygon] = polygon_area

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_data_df["census_polygon_area"] =\
census_data_df["census_polygon"].apply(lambda x: census_polygons_areas[x])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
census_data_df["census_polygon_area"] = census_data_df["census_polygon_area"]/1E6

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
census_data_polygons_areas = dataiku.Dataset("census_data_polygons_areas")
census_data_polygons_areas.write_with_schema(census_data_df)