from transformers import BertTokenizer, BertModel
from bert_score import BERTScorer

#This sample computes some metrics using the BERT package
def evaluate(input_df, recipe_params, interpreted_columns, **kwargs):
    # For those metrics, we need the answer and the ground truth
    # Those are extracted from the input dataframe with the right column names
    reference = interpreted_columns.ground_truth.to_list()
    candidate = interpreted_columns.output.to_list()

    # Build the BETTScorer object with our specific setup
    scorer = BERTScorer(model_type='roberta-large')

    # Compute the metrics
    P, R, F1 = scorer.score(
        candidate,
        reference
    )
    
    # Return the metric (average value of the precision metrics)
    # Return row-by-row detail (just the precision metric)
    return P.mean().item(), P.tolist()
