import pandas as pd
import numpy as np
import json

# Load the data
data = pd.read_csv('../analysis/closed_source/t2t_gpt4/setting1_word_cooccurence.csv')  # Update the path as necessary

# Prepare the output structure
output_json = {}

# Iterate over each bias type
for bias_type in data['bias_type'].unique():
    output_json[bias_type] = []
    bias_data = data[data['bias_type'] == bias_type]

    # Iterate through each row in the filtered bias data
    for index, row in bias_data.iterrows():
        row_data = {}
        target1_data = {}
        target2_data = {}

        # Iterate over each letter in the alphabet for target1 and target2
        for i in range(1, 27):
            answer_prefix1 = f"answer1_{chr(96+i)}"
            score_prefix1 = f"score1_{chr(96+i)}"
            answer_prefix2 = f"answer2_{chr(96+i)}"
            score_prefix2 = f"score2_{chr(96+i)}"

            # Add data for each target-answer pair sorted by scores
            target1_data[f"{row['target1']} || {row[answer_prefix1]}"] = row[score_prefix1]
            target2_data[f"{row['target2']} || {row[answer_prefix2]}"] = row[score_prefix2]

        # Sort pairs by scores in descending order
        sorted_target1_data = dict(sorted(target1_data.items(), key=lambda item: item[1], reverse=True))
        sorted_target2_data = dict(sorted(target2_data.items(), key=lambda item: item[1], reverse=True))

        # Append to row_data and then to the overall JSON structure
        row_data['target1'] = sorted_target1_data
        row_data['target2'] = sorted_target2_data
        output_json[bias_type].append(row_data)

# Write the JSON output
output_file_path = '../analysis/closed_source/t2t_gpt4/sorted_scores.json'  # Save in the input directory
with open(output_file_path, 'w') as outfile:
    json.dump(output_json, outfile, indent=4)

print(f"JSON file saved as {output_file_path}")