# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
cdc_disease_health_outcomes = dataiku.Dataset("cdc_disease_health_outcomes")
df = cdc_disease_health_outcomes.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Select the percentage values
disease = [col for col in df if col.startswith('Percent')]
df_disease = df[disease]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rank the numerical percentage disease values and express them as percentile
df_qroup = df_disease.rank(pct=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rename the colimns
df_qroup.columns = df_disease.columns.str.replace(r'Percent', '')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final = pd.concat([df, df_qroup], axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Standardize FIPS, State and County codes to the right digit format
final['FIPS'] = final['FIPS'].astype(int)
final['FIPS'] = final['FIPS'].astype(str).apply(lambda x: '0'+x  if len(x)!=11 else x)

# --------------------------------------------------------------------------------
# Write recipe outputs
cdc_disease_percentage_percentile_tract = dataiku.Dataset("cdc_disease_percentage_percentile_tract")
cdc_disease_percentage_percentile_tract.write_with_schema(final)
