import sys
import os
import json
import tqdm


sys.path.append('./visual_argument_experiments/src/')

from visarg.models.llava import llava
from visarg.others.utils import parse_annotation

def argument_construction():
  with open('./dataset/cartoon_storage.json') as ctn:
    cartoon_data = json.load(ctn)

    for i, key in enumerate(cartoon_data.keys()):
      # if int(key) != 103 and int(key) != 144 and int(key) != 233 and int(key) != 716 and int(key) != 747 and int(key) != 174:
      #   (vps, cps, c, rs, ctg) = parse_annotation(cartoon_data[key]['annotation'])
      if int(key) == 1:
        (vps, cps, c, rs, ctg) = parse_annotation(cartoon_data[key]['annotation'])
        image_path = os.path.join('./dataset/images', cartoon_data[key]['image_url'].split('/')[-1])
        print('='*10)

        visual_premises = ''
        for i, vp in enumerate(vps):
          visual_premises += f"{i+1}. {vp}\n"
        commonsense_premises = ''
        for i, cp in enumerate(cps):
          commonsense_premises += f"{i+1}. {cp}\n"  
        
        prompt = f"""
Your task is to construct the reasoning steps to reach a conclusion of the image based on the given image, as well as the provided visual and commonsense premises.

Image:
<image>

Visual Premises:
{visual_premises}
Commonsense Premises:
{commonsense_premises}
Conclusion:
{c}

Reasoning Steps:
"""

        result = llava(image_path, prompt, answer_offset="Reasoning Steps:")
        print(result)

  pass


if __name__ == '__main__':
  argument_construction()