# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import json

# custom function that can be found within Libraries tab > G+L
from census_api_functions import get_project_variables, get_query_text, state_name_list, get_tracts_code_table

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# variable specifications
census_api_key = get_project_variables('standard','api_key')
# census API variables specifications
census_code = 'DP04'
year = 2022

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# url path to call census API services
url_path = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME&for=state:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# numerical list of US states
state_nums_list = state_name_list(url_path)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# API request to gather the dataset from U.S. Census Bureau
all_tracts_df = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME,group({census_code})&for=tract:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:], columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[:, 1:]
        
        # Use pd.concat instead of append
        all_tracts_df = pd.concat([all_tracts_df, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# data preprocessing: column mapping, feature generation
all_tracts_df = all_tracts_df[['GEO_ID', 'DP04_0001E',
                               'DP04_0012E', 'DP04_0013E',
                               'DP04_0014E',
                               'DP04_0078E', 'DP04_0079E',
                               'DP04_0058E',
                               'DP04_0014PE',
                               'DP04_0002E',
                              'DP04_0058PE']]
all_tracts_df = all_tracts_df.rename(columns = {'DP04_0001E':'E_HU',
                                                'DP04_0014E':'E_MOBILE',
                                                'DP04_0058E': 'E_NOVEH',
                                                "DP04_0014PE": 'EP_MOBILE',
                                               'DP04_0058PE':'EP_NOVEH'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df[['E_HU','DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E','DP04_0002E']] = all_tracts_df[['E_HU','DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E','DP04_0002E']].astype(float)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df['E_MUNIT'] = all_tracts_df['DP04_0012E'] + all_tracts_df['DP04_0013E']
all_tracts_df['E_CROWD'] = all_tracts_df['DP04_0078E'] + all_tracts_df['DP04_0079E']
all_tracts_df['EP_CROWD'] = (all_tracts_df['E_CROWD'] / all_tracts_df["DP04_0002E"]) * 100
all_tracts_df = all_tracts_df.drop(['DP04_0012E', 'DP04_0013E','DP04_0078E', 'DP04_0079E', "DP04_0002E"], axis = 1)
try:
    all_tracts_df['EP_MUNIT'] = (all_tracts_df['E_MUNIT'] / all_tracts_df["E_HU"]) * 100
except ZeroDivisionError:
    all_tracts_df['EP_MUNIT'] = 0

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs

S0101_all_tracts = dataiku.Dataset("DP04_svi_tracts")
S0101_all_tracts.write_with_schema(all_tracts_df)