# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import ast
import requests
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()
year = 2022
census_api_key = variables["standard"]["api_key"]
url_path = f"https://api.census.gov/data/{year}/acs/acs5/profile?get=NAME&for=state:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_query_text(query_url):
    response = requests.get(query_url)
    return response.text

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_state_names_query_url = url_path
all_state_names_query_text = get_query_text(all_state_names_query_url)
all_state_names_query_result_list = json.loads(all_state_names_query_text)
all_state_names_df = pd.DataFrame(all_state_names_query_result_list[1:],columns=all_state_names_query_result_list[0])
state_nums_list = list(all_state_names_df["state"].unique())

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=NAME,group(S1701)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:], columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[:, 1:]
        
        # Use pd.concat instead of append
        all_tracts_df = pd.concat([all_tracts_df, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = all_tracts_df[['GEO_ID','state', 'county', 'S1701_C01_040E', 'S1701_C01_001E']]
all_tracts_df[['S1701_C01_040E', 'S1701_C01_001E']] = all_tracts_df[['S1701_C01_040E', 'S1701_C01_001E']].astype(float)
all_tracts_df.rename(columns={'S1701_C01_040E':'E_POV150'}, inplace=True)

try:
    all_tracts_df['EP_POV150'] = (all_tracts_df['E_POV150'] / all_tracts_df['S1701_C01_001E']) * 100
except ZeroDivisionError:
    all_tracts_df['EP_POV150'] = 0

all_tracts_df = all_tracts_df.drop(['S1701_C01_001E'], axis = 1)


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S0601 = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=NAME,group(S0601)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:], columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[:, 1:]
        
        # Use pd.concat instead of append
        all_tracts_df_S0601 = pd.concat([all_tracts_df_S0601, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S0601 = all_tracts_df_S0601[['GEO_ID', 'S0601_C01_001E', 'S0601_C01_033E']]
all_tracts_df_S0601.rename(columns={'S0601_C01_001E':'E_TOTPOP', 'S0601_C01_033E':'EP_NOHSDP'}, inplace=True)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2503 = pd.DataFrame()
for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=NAME,group(S2503)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:],columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[: , 1:]
        # Use pd.concat instead of append
        all_tracts_df_S2503 = pd.concat([all_tracts_df_S2503, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2503 = all_tracts_df_S2503[['GEO_ID', 'S2503_C01_028E', 'S2503_C01_032E', 'S2503_C01_036E', 'S2503_C01_040E', 'S2503_C01_001E']]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2503[['S2503_C01_028E', 'S2503_C01_032E', 'S2503_C01_036E', 'S2503_C01_040E', 'S2503_C01_001E']] = all_tracts_df_S2503[['S2503_C01_028E', 'S2503_C01_032E', 'S2503_C01_036E', 'S2503_C01_040E', 'S2503_C01_001E']].astype(float)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2503['E_HBURD'] = all_tracts_df_S2503['S2503_C01_028E'] + all_tracts_df_S2503['S2503_C01_032E'] + all_tracts_df_S2503['S2503_C01_036E'] + all_tracts_df_S2503['S2503_C01_040E']

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
try:
    all_tracts_df_S2503['EP_HBURD'] = (all_tracts_df_S2503['E_HBURD'] / all_tracts_df_S2503['S2503_C01_001E']) * 100
except ZeroDivisionError:
    all_tracts_df_S2503['EP_HBURD'] = 0

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2503 = all_tracts_df_S2503.drop(['S2503_C01_028E', 'S2503_C01_032E', 'S2503_C01_036E', 'S2503_C01_040E', 'S2503_C01_001E'], axis = 1)


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S0101 = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=NAME,group(S0101)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:],columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[: , 1:]
        # Use pd.concat instead of append
        all_tracts_df_S0101 = pd.concat([all_tracts_df_S0101, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S0101 = all_tracts_df_S0101[['GEO_ID', 'S0101_C01_030E', 'S0101_C02_030E']]
all_tracts_df_S0101 = all_tracts_df_S0101.rename(columns = {'S0101_C01_030E':'E_AGE65','S0101_C02_030E':'EP_AGE65'})


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2701 = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5/subject?get=NAME,group(S2701)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:],columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[: , 1:]

        all_tracts_df_S2701 = pd.concat([all_tracts_df_S2701, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_S2701 = all_tracts_df_S2701[['GEO_ID', 'S2701_C04_001E', 'S2701_C05_001E']]
all_tracts_df_S2701.rename(columns={'S2701_C04_001E':'E_UNINSUR', 'S2701_C05_001E':'EP_UNINSUR'}, inplace=True)


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_B06009 = pd.DataFrame()

for state in state_nums_list:
    print("state: " + state)
    try:
        state_all_tracts_query_url = f"https://api.census.gov/data/{year}/acs/acs5?get=NAME,group(B06009)&for=county:*&in=state:{state}&key={census_api_key}"
        state_all_tract_names_query_text = get_query_text(state_all_tracts_query_url)
        state_all_tract_names_query_result_list = json.loads(state_all_tract_names_query_text)
        state_all_tract_names_df = pd.DataFrame(state_all_tract_names_query_result_list[1:],columns=state_all_tract_names_query_result_list[0])
        df = state_all_tract_names_df.iloc[: , 1:]

        all_tracts_df_B06009 = pd.concat([all_tracts_df_B06009, df], ignore_index=True)
    except Exception as e:
        print(f"Error processing state {state}: {e}")
        continue

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df_B06009 = all_tracts_df_B06009[['GEO_ID','B06009_002E']]
all_tracts_df_B06009 = all_tracts_df_B06009.rename(columns = {'B06009_002E':'E_NOHSDP'})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dfs = [all_tracts_df,
all_tracts_df_S0601,
all_tracts_df_S2503,
all_tracts_df_S0101,
all_tracts_df_S2701,
all_tracts_df_B06009]
dfs = [df.set_index('GEO_ID') for df in dfs]

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
final_data = dfs[0].join(dfs[1:])

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
S_svi_county = dataiku.Dataset("S_svi_county")
S_svi_county.write_with_schema(final_data)