# Absolute Grading: Outputs score of 1 to 5
import json
import copy
from scipy.stats import ttest_rel

def main():    
    with open("grading_result.json") as f:
        data = json.load(f)
        data["Novelty_questioning"] = copy.deepcopy(data["Novelty"])
        data["Novelty_answering"] = copy.deepcopy(data["Novelty"])
        del data["Novelty"]

        for rubric_short_name, rubric_data in data.items():
            for method, method_data in rubric_data.items():
                method_ratings = []
                for topic, topic_data in method_data["method_raw_data"].items():
                    topic_ratings = []
                    for raw_data in topic_data["topic_raw_data"]:
                        if "Novelty" in rubric_short_name:
                            if rubric_short_name == "Novelty_questioning" and raw_data["type"] in ["Original Question", "Information Request"]:
                                topic_ratings.append(raw_data["rating"])
                            elif rubric_short_name == "Novelty_answering" and raw_data["type"] not in ["Original Question", "Information Request"]:
                                topic_ratings.append(raw_data["rating"])
                        else:
                            topic_ratings.append(raw_data["rating"])

                    data[rubric_short_name][method]["method_raw_data"][topic]["avg_rating"] = sum(topic_ratings) / len(topic_ratings) if len(topic_ratings) else None
                    
                for topic, topic_data in method_data["method_raw_data"].items():
                    if topic_data["avg_rating"] is not None:
                        method_ratings.append(topic_data["avg_rating"])
                data[rubric_short_name][method]["avg_rating"] = round(sum(method_ratings) / len(method_ratings), 2) if len(method_ratings) else "-"
                data[rubric_short_name][method]["method_ratings"] = method_ratings

        for rubric_short_name, rubric_data in data.items():
            print(f"** {rubric_short_name} **")
            for method, method_data in sorted(rubric_data.items()):
                print(f"{method}: {method_data['avg_rating']}")
                # if method != "new_method":
                #     if len(method_data["method_ratings"]) == len(rubric_data["new_method"]["method_ratings"]):  # Ensure equal lengths for paired test
                #         t_stat, p_value = ttest_rel(rubric_data["new_method"]["method_ratings"], method_data["method_ratings"])
                #         print(f"\nPaired t-test results for new_method vs {method}:")
                #         print(f"t-statistic: {t_stat:.3f}")
                #         print(f"p-value: {p_value:.3f}")
                #     else:
                #         print(f"\nCannot perform t-test for new_method vs {method} due to unequal sample sizes.")
            print("-" * 50)
                
if __name__ == "__main__":
    main()
