import os
import json

def is_image(file):
    for ext in ['jpg','jpeg','png']:
        if file.endswith(f'.{ext}'):
            return True
    return False

data_dir='../train_data'

with open(f'{data_dir}/metadata.jsonl','w',encoding='utf-8') as f:
    for file in os.listdir(data_dir):
        if not is_image(file):
            continue
        if not os.path.exists(f'{data_dir}/{file}.txt'):
            continue
        text = open(f'{data_dir}/{file}.txt','r',encoding='utf-8').read()
        f.write(json.dumps({'file_name':file,'text':text})+'\n')
