from transformers import pipeline
import json
from tqdm import tqdm


test_file_path = "./data/complete_qa_final.json"
with open(test_file_path, "r") as input_file:
    data = json.load(input_file)
    
# For testing only use first 100 answers
data_subset = data[:1000]
texts = [el["answer_1"] for el in data_subset]

toxigen_hatebert = pipeline("text-classification", model="tomh/toxigen_hatebert", tokenizer="bert-base-uncased", device=0)
tokenizer_kwargs = {'padding':True,'truncation':True,'max_length':512}

predictions = []
for el in tqdm(texts, total=len(texts)):
    response = toxigen_hatebert(el, **tokenizer_kwargs)
    predictions.append(response[0]["label"])

with open("./results/toxigen_uncased_result.json", "w") as output_file:
    json.dump(predictions, output_file)