# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# -*- coding: utf-8 -*-
import dataiku
import tiktoken

df = dataiku.Dataset("product_reviews").get_dataframe()
LLM_ID = dataiku.get_custom_variables()["LLM_id"]
llm = dataiku.api_client().get_default_project().get_llm(LLM_ID)

enc = tiktoken.encoding_for_model("gpt-4o")

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
prompt = """Decide whether a product review's sentiment is positive, neutral, or negative.
Answer with a single word which can ONLY be 'positive', 'neutral' or 'negative'"""

examples = [
    (
        "I love my new chess board!",
        "positive"
    ),
    (
        "Not what I expected but I guess it'll do",
        "neutral"
    ),
    (
        "I'm so disappointed. The product seemed much better on the website",
        "negative"
    )
]

tokens = [enc.encode(s) for s in set(x[1] for x in examples)]
for t in tokens:
    assert len(t) == 1

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
for i in df.index:
    completion = llm.new_completion()
    completion.settings["temperature"] = 0
    completion.settings["maxOutputTokens"] = 1
    completion.settings["logitBias"] = {t[0]: 100 for t in tokens}
    completion.with_message(prompt)
    for example in examples:
        completion.with_message(example[0])
        completion.with_message(example[1], role="assistant")
    result = completion.with_message(df.loc[i, "text"]).execute().text
    df.loc[i, "prediction"] = result

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
dataiku.Dataset("sentiment_llm_mesh").write_with_schema(df)