# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
import dataiku
import pandas as pd
from scipy.stats import kendalltau

df = dataiku.Dataset("answers_evaluated").get_dataframe()
graded_df = dataiku.Dataset("answers_annotated").get_dataframe()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
df["answer_id"] = df.apply(
    lambda row: f"{row['approach']}-{row['question_id']}", axis=1
)
df = df.set_index("answer_id")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for i in graded_df.index:
    answer_id = graded_df.at[i, "answer_id"]
    grade = int(graded_df.at[i, "label"][0])
    df.at[answer_id, "grade"] = grade

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
results = {
    "evaluation_method": [],
    "kendall_tau_statistic": [],
    "kendall_tau_pvalue": [],
}
for evaluation_method in ["BERT score/score", "answer_correctness/v1/score"]:
    result = kendalltau(df[evaluation_method], df["grade"])
    results["kendall_tau_statistic"].append(result.statistic)
    results["kendall_tau_pvalue"].append(result.pvalue)
    results["evaluation_method"].append(evaluation_method)

results_df = pd.DataFrame.from_dict(results)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dataiku.Dataset("automated_evaluation_methods").write_with_schema(results_df)
