# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from shapely.geometry import mapping
from shapely import wkt
import json
from collections import defaultdict
import topojson as tp
from geojson_rewind import rewind

# Read recipe input datasets
tracts_metadata = dataiku.Dataset("tracts_metadata")
tl_2020_01_tract = dataiku.Dataset("tl_geo_us_tract")
tl_2020_us_county = dataiku.Dataset("tl_geo_us_county")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Output folders id
tracts_folder_id = dataiku.Folder("tracts_data").get_id()
county_folder_id = dataiku.Folder("state_to_county").get_id()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Output folders
tracts_data = dataiku.Folder(tracts_folder_id)
state_to_county = dataiku.Folder(county_folder_id)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Generate state to county json
meta_tracts_df = tracts_metadata.get_dataframe()
state_county_mapping_df = meta_tracts_df.groupby("State_name")["County_name"].apply(list)
state_county_mapping_dict = {k: list(set(v)) for k,v in state_county_mapping_df.to_dict().items()}

# # Generate state_codes_lookup dataframe
state_codes_lookup = meta_tracts_df.copy(deep=True)[["State_name","County_name","County_code","State_code"]]
state_codes_lookup["State_code"] =  state_codes_lookup["State_code"].astype(
            int).astype(str).str.zfill(2)
state_codes_lookup["County_code"] = state_codes_lookup["County_code"].astype(
            int).astype(str).str.zfill(3)
state_codes_lookup["State_County_code"] = state_codes_lookup["State_code"] + state_codes_lookup["County_code"]
state_codes_lookup = state_codes_lookup.drop_duplicates(subset=['State_County_code']).reset_index(drop=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write state to county mapping

state_to_county.write_json("state_county_options.json",state_county_mapping_dict)
del state_county_mapping_dict
del state_county_mapping_df

# #Write state_codes_lookup csv
with state_to_county.get_writer("state_codes_lookup.csv") as writer:
  writer.write(state_codes_lookup.to_csv().encode("utf-8"))

del state_codes_lookup
del meta_tracts_df

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Function def to get simplified geojson from dataframes
def get_geojson_mapping(dataset,cols,prop_cols,mode):
    ## Topology will be simplified by state to speed up computation
    # compute state to geo feature mapping from df
    feature_type = "Feature"
    geojson_type = "FeatureCollection"
    result = defaultdict(list)
    for row in dataset.iter_rows(columns=cols):
        id_ = str(row["GEOID"])
        if not id_ in result:
            geometry = json.loads(json.dumps(mapping(wkt.loads(row["the_geom"]))))
            properties = {k: str(row[k]) for k in prop_cols}
            feature = {"geometry": geometry,"id": id_, "properties": properties, "type": feature_type}
            if mode == "county":
                result[str(properties["STATEFP"])].append(feature)
            else:
                result[str(properties["STATEFP"]) + str(properties["COUNTYFP"])].append(feature)
    return result

def simplify_task(features):
    ## Simplifying geometries to compress the geometry files
    topology = { "type": "FeatureCollection", "features": features}
    topology = tp.Topology(topology)
    topology = topology.toposimplify(epsilon=0.01)
    topology = topology.to_geojson()
    return json.loads(topology)

def concurrent_simplify_exec(geojson_by_mapping):
    simplified_geos = []
    for key in list(geojson_by_mapping.keys()):
        simplified_geos.append(simplify_task(geojson_by_mapping[key]))
        ## Delete value from dict to ease memory
        del geojson_by_mapping[key]
        
    return simplified_geos

def format_simplified_geos(simplified_geos):
    # Reformat id (topojson generates new ids per feature)
    ## Generated geojsons need to be rewind
    features = []
    for item in simplified_geos:
        for f in item["features"]:
            f["id"] = f["properties"]["GEOID"]
            features.append(f)

    geojson = {"type":"FeatureCollection", "features": features}
    ## Rewind geojson to avoid badly wound geometries
    geojson = rewind(geojson, rfc7946=False)
    return geojson

def get_simplified_geojson(dataset,cols,prop_cols,mode):
    print("DEBUUUG --------")
    print("Simplified computation started")
    geojson_by_mapping = get_geojson_mapping(dataset,cols,prop_cols,mode)
    print("DEBUUUG --------")
    print("geojson by states read")
    simplified_geos = concurrent_simplify_exec(geojson_by_mapping)
    print("DEBUUUG --------")
    print("Simplified computation ended")
    del geojson_by_mapping
    return format_simplified_geos(simplified_geos)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Generate tracts geojson
prop_cols_tracts = ["STATEFP","COUNTYFP","TRACTCE","GEOID","NAME","NAMELSAD","MTFCC","FUNCSTAT","ALAND","AWATER","INTPTLAT","INTPTLON"]
columns_geo_tracts = ["the_geom","STATEFP","COUNTYFP","TRACTCE","GEOID","NAME","NAMELSAD","MTFCC","FUNCSTAT","ALAND","AWATER","INTPTLAT","INTPTLON"]
tracts_geojson = get_simplified_geojson(tl_2020_01_tract,columns_geo_tracts,prop_cols_tracts,"tract")
tracts_data.write_json("tracts_data_complete.json",tracts_geojson)
del tracts_geojson

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Generate Counties geojson
cols_geo_counties = ["the_geom","GEOID","STATEFP","COUNTYFP","NAME","NAMELSAD","LSAD","FUNCSTAT","ALAND","AWATER","INTPTLAT","INTPTLON","MTFCC"]
pros_cols_county = ["GEOID","STATEFP","COUNTYFP","NAME","NAMELSAD","LSAD","FUNCSTAT","ALAND","AWATER","INTPTLAT","INTPTLON","MTFCC"]
counties_geojson = get_simplified_geojson(tl_2020_us_county,cols_geo_counties,pros_cols_county,"county")

# Write json file and free memory
tracts_data.write_json("counties.json",counties_geojson)
del counties_geojson