# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
svi_tracts_features = dataiku.Dataset("svi_tracts_features")
df = svi_tracts_features.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Select the Sum of Series of the 4 themes values
SPL = [col for col in df if col.startswith('SPL_')]
df_SPL = df[SPL]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rank the numerical percentage values and express them as percentile on 4 digits
df_qroup = df_SPL.rank(pct=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Replace the column name with RPL that indicates the percentile ranking of a theme
df_qroup.columns = df_SPL.columns.str.replace(r'SPL_', 'RPL_')
final = pd.concat([df, df_qroup], axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
final['FIPS'] = final['FIPS'].astype(int)
final['FIPS'] = final['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=11 else x)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final['State_code'] = final['State_code'].astype(int)
final['State_code'] = final['State_code'].astype(str).apply(lambda x: '0'+x  if len(x)!=2 else x)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final['County_code'] = final['County_code'].astype(int)
final['County_code'] = final['County_code'].astype(str).apply(lambda x: '0'+x  if len(x)==2 else x)
final['County_code'] = final['County_code'].astype(str).apply(lambda x: '00'+x  if len(x)==1 else x)

# Write recipe outputs
svi_tracts_window_themes = dataiku.Dataset("svi_tracts_window_themes")
svi_tracts_window_themes.write_with_schema(final)
