import argparse
import json
import os
from scipy.stats import ttest_rel

def main(args):
    rubric_grading_output_dir = args.rubric_grading_output_dir
    for filename in os.listdir(rubric_grading_output_dir):
        if filename.endswith(".json"):
            file_path = os.path.join(rubric_grading_output_dir, filename)
            with open(file_path) as f:
                data = json.load(f)
        
            rubric_description = data["rubric"]["criteria_description"]
            # Initialize a dictionary to store the ratings for each method
            method_ratings = {}

            # Loop through each topic and gather the ratings for each method
            for topic, evaluations in data["topic_grading"].items():
                for method, details in evaluations.items():
                    if method not in method_ratings:
                        method_ratings[method] = []
                    method_ratings[method].append(details["rating"])

            # Calculate the average rating for each method
            average_ratings = {method: sum(ratings) / len(ratings) for method, ratings in method_ratings.items()}

            # Print the average ratings
            print(rubric_description)
            for method, avg_rating in sorted(average_ratings.items()):
                print(f"{method}: {avg_rating:.2f}")
            
            for method, scores in sorted(method_ratings.items()):
                if method != "new_method":
                    if len(scores) == len(method_ratings["new_method"]):  # Ensure equal lengths for paired test
                        t_stat, p_value = ttest_rel(method_ratings["new_method"], scores)
                        print(f"\nPaired t-test results for new_method vs {method}:")
                        print(f"t-statistic: {t_stat:.3f}")
                        print(f"p-value: {p_value:.3f}")
                    else:
                        print(f"\nCannot perform t-test for new_method vs {method} due to unequal sample sizes.")

            print("=" * 50)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Process rubric output files.")
    parser.add_argument('-o', '--rubric-grading-output-dir', type=str, help="Directory containing the rubric output JSON files.")
    args = parser.parse_args()
    main(args)