# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu

# Read recipe inputs
cdc_disease_percentage_county = dataiku.Dataset("cdc_disease_percentage_county")
df = cdc_disease_percentage_county.get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Select the percentage values
disease = [col for col in df if col.startswith('Percent')]
df_disease = df[disease]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rank the numerical percentage disease values and express them as percentile
df_qroup = df_disease.rank(pct=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Rename the colimns
df_qroup.columns = df_disease.columns.str.replace(r'Percent', '')

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final = pd.concat([df, df_qroup], axis=1)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
cdc_disease_percentage_percentile_county = dataiku.Dataset("cdc_disease_percentage_percentile_county")
cdc_disease_percentage_percentile_county.write_with_schema(final)