import os
import subprocess
from datasets import load_dataset


def main():
    # script_file = "./datasets/generate_boxes_data.py"
    script_file = "./datasets/generate_boxes_data4.py"
    dataset_dir = "./datasets/boxes_dataset_move_only5"

    result = subprocess.run(["python", script_file,
                    "--seed", "42",
                    "--object_vocabulary_file", "./datasets/boxes_objects.csv",
                    "--num_samples", "500000",
                    "--output_dir", dataset_dir,
                    "--num_boxes", "7",
                    "--num_operations", "32",
                    "--expected_num_items_per_box", "2",
                    "--max_items_per_box", "3",
                    "--all_contents_operation",
                    # "--include_modifiers", "always",
                    ])
    
    if result.returncode != 0:
        raise Exception("Failed to generate dataset")
    
    print("Dataset generated successfully")


def test():
    dataset = load_dataset('json', data_dir='./datasets/boxes_dataset_move_only5')
    print(dataset)
    print(dataset['train'][0])


if __name__ == '__main__':
    main()
    test()
