# Absolute Grading: Outputs score of 1 to 5
import argparse
from prometheus_eval import PrometheusEval
from prometheus_eval.prompts import ABSOLUTE_PROMPT, SCORE_RUBRIC_TEMPLATE
import json
import os


def load_json(path):
    with open(path) as f:
        return json.load(f)
    
def load_txt_file(path):
    with open(path) as f:
        text = f.read()
        lines = text.split("\n")
        lines = [line for line in lines if "#" in line]
        return "\n".join(lines)

def dict_to_string(d):
    return '\n'.join(f"{key}: {value}" for key, value in d.items())


def main(args):
    judge = PrometheusEval(model_id="prometheus-eval/prometheus-7b-v2.0", absolute_grade_template=ABSOLUTE_PROMPT)
    judge.model.gpu_memory_utilization = 0.5

    rubric = load_json(args.rubric_path)
    dataset = load_json(args.dataset_path)
    result_dir = args.result_dir

    method_names = [d for d in os.listdir(result_dir) if os.path.isdir(os.path.join(result_dir, d))]
    method_paths = [os.path.join(result_dir, method) for method in method_names]

    for rubric_idx, rubric_to_grade in enumerate(rubric):
        instructions = []
        responses = []
        topics = []
        methods = []
        for cur_topic in dataset:
            topic = cur_topic["topic"]
            intent = cur_topic["intent"]
            shared_instruction = f"Generate a comprehensive report that synthesizes information on the topic {topic}, incorporating diverse perspectives, novel insights, and in-depth coverage, while ensuring high relevance to the user’s initial intent: {intent}"
            article_dir_name = topic.replace(' ', '_').replace('/', '_')
            for method_idx, method_path in enumerate(method_paths):
                instructions.append(shared_instruction)
                responses.append(load_txt_file(os.path.join(method_path, article_dir_name, args.file_name_to_evaluate)))
                topics.append(topic)
                methods.append(method_names[method_idx])

        feedbacks, ratings = judge.absolute_grade(
            instructions=instructions,
            responses=responses,
            rubric=dict_to_string(rubric_to_grade),
            reference_answers=None,
            params={}
        )

        os.makedirs(args.output_dir, exist_ok=True)
        with open(os.path.join(args.output_dir, f"rubric_{rubric_idx}_output.json"), "w") as f:
            data = {"rubric": rubric_to_grade,
                    "topic_grading": {}}
            for topic, method, feedback, rating in zip(topics, methods, feedbacks, ratings):
                if topic not in data["topic_grading"]:
                    data["topic_grading"][topic] = {}
                data["topic_grading"][topic][method] = {"feedback": feedback, "rating": rating}
            json.dump(data, f, indent=2)



if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Process and evaluate articles")
    parser.add_argument('-d', '--result-dir', type=str)
    parser.add_argument('-f', '--file-name-to-evaluate', type=str, help="File name to evaluate")
    parser.add_argument('-r','--rubric-path', type=str, default="storm_rubric.json", help="Path to the rubric JSON file")
    parser.add_argument('-o', '--output-dir', type=str, default="storm_rubric_output", help="Directory for output")
    parser.add_argument('--dataset-path', type=str, default=os.path.join("..", "dataset", "final_core_dataset_meta.json"), help="Path to the dataset JSON file")
    args = parser.parse_args()
    main(args)

# example usage
# python prometeus_eval.py --result-dir 2k_report_only --file-name-to-evaluate article_to_evaluate_cap_2k.txt --rubric-path storm_rubric.json --output-dir ./storm_rubric_output