# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd
import requests
import json

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
project_key = dataiku.get_custom_variables()["projectKey"]
client = dataiku.api_client()
project = client.get_project(project_key)
variables = project.get_variables()
year = 2022
census_code = 'S1701'
census_api_key = variables["standard"]["api_key"]
url_path = f"https://api.census.gov/data/2022/acs/acs5/profile?get=NAME&for=state:*&key={census_api_key}"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
def get_query_text(query_url):
    response = requests.get(query_url)
    return response.text

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_state_names_query_url = url_path
all_state_names_query_text = get_query_text(all_state_names_query_url)
all_state_names_query_result_list = json.loads(all_state_names_query_text)
all_state_names_df = pd.DataFrame(all_state_names_query_result_list[1:],columns=all_state_names_query_result_list[0])
state_nums_list = list(all_state_names_df["state"].unique())

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# API request to gather the dataset from U.S. Census Bureau
all_tracts_df = pd.DataFrame()

for state in state_nums_list:
    print(f"Processing state: {state}")
    try:
        # Construct API URL
        state_all_tracts_query_url = (
            f"https://api.census.gov/data/{year}/acs/acs5/subject"
            f"?get=NAME,group(S1701)&for=tract:*&in=state:{state}&key={census_api_key}"
        )
        # Fetch the data using requests
        response = requests.get(state_all_tracts_query_url)
        response.raise_for_status()  # Raise exception for HTTP errors
        
        # Parse the JSON response
        state_all_tract_names_query_result_list = response.json()
        
        # Convert JSON to DataFrame
        state_all_tract_names_df = pd.DataFrame(
            state_all_tract_names_query_result_list[1:], 
            columns=state_all_tract_names_query_result_list[0]
        )
        
        # Exclude NAME column (first column)
        df = state_all_tract_names_df.iloc[:, 1:]
        
        # Concatenate to main DataFrame
        all_tracts_df = pd.concat([all_tracts_df, df], ignore_index=True)
        
    except requests.exceptions.RequestException as req_err:
        print(f"HTTP error for state {state}: {req_err}")
    except json.JSONDecodeError as json_err:
        print(f"JSON decode error for state {state}: {json_err}")
    except Exception as e:
        print(f"Unexpected error for state {state}: {e}")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
all_tracts_df = all_tracts_df[['GEO_ID', 'S1701_C01_040E', 'S1701_C01_001E']]
all_tracts_df[['S1701_C01_040E', 'S1701_C01_001E']] = all_tracts_df[['S1701_C01_040E', 'S1701_C01_001E']].astype(float)
all_tracts_df.rename(columns={'S1701_C01_040E':'E_POV150'}, inplace=True)

try:
    all_tracts_df['EP_POV150'] = (all_tracts_df['E_POV150'] / all_tracts_df['S1701_C01_001E']) * 100
except ZeroDivisionError:
    all_tracts_df['EP_POV150'] = 0

all_tracts_df = all_tracts_df.drop(['S1701_C01_001E'], axis = 1)


# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Write recipe outputs
S1701_svi_tracts = dataiku.Dataset("S1701_svi_tracts")
S1701_svi_tracts.write_with_schema(all_tracts_df)