from datasets import load_dataset
from PIL import Image
from io import BytesIO
import requests
import os
import json
import uuid
import shutil



with open('/home/user/llavafinetune/llava_preference_data.jsonl', 'r') as f:
    data = [json.loads(line) for line in f]

new_dataset = []
for item in data:
    for key in item.keys():
        if key != 'image_id':
            new_item = {}
            new_item['image_id'] = item['image_id']
            new_item['question'] = item[key]['Question']
            new_item['rejected'] = item[key]['Rejected']
            new_item['preferred'] = item[key]['Preferred']
            new_dataset.append(new_item)
            

VLFeedback = load_dataset("MMInstruction/VLFeedback")

# print(dataset['train'][0]['completions'])
# for item in dataset['train']:
def process_dataset(item):
    # for item in dataset:
    scores = []
    for i in item['completions']['annotations']:
        score = (int(i['Helpfulness']['Rating']) + int(i['Visual Faithfulness']['Rating']))/2
        scores.append(score)
    preferred_index = scores.index(max(scores))
    rejected_index = scores.index(min(scores))
    item['preferred'] = item['completions']['response'][preferred_index]
    item['rejected'] = item['completions']['response'][rejected_index]  
    return item

VLFeedback = VLFeedback.map(process_dataset)
   

def process_and_save(dataset, output_folder):
    # Define image subfolder within output folder
    # subset_folder = os.path.join(output_folder, subset_name)
    # image_subfolder = os.path.join(output_folder, 'images')


    # if not os.path.exists(image_subfolder):
    #     os.makedirs(image_subfolder)


    # if not os.path.exists(subset_folder):
    #     os.makedirs(subset_folder)


    # Initialize list to hold all JSON data
    json_data_list = []

    # if subset_name == 'train':
    #     dataset = dataset[:int(0.8*len(dataset))]
    # elif subset_name == 'val':
    #     dataset = dataset[int(0.8*len(dataset)):]
        
    # Process and save images and labels
    for item in dataset:
        
        # Structure for LLaVA JSON
        json_data = {
            "id": item['id'],
            "image": item['img_path'],
            "conversations": [
                {
                    "from": "human",
                    "value": item['prompt']
                },
                {
                    "from": "gpt",
                    "value": {'rejected': item['rejected'], 'preferred': item['preferred']}
                }
            ]
        }


        # Append to list
        json_data_list.append(json_data)


    # Save the JSON data list to a file
    json_output_path = os.path.join(output_folder, 'VLfeedback_dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)



process_and_save(VLFeedback['train'], '/home/user/llavafinetune/data')
process_and_save(new_dataset, '/home/user/llavafinetune/data', 'val')