import glob
import json, os, sys
from tqdm import tqdm


SCHEMA = {
    'auto': ['model', 'price', 'engine', 'fuel_economy'],
    'book': ['title', 'author', 'isbn_13', 'publisher', 'publication_date'],
    'camera': ['model', 'price', 'manufacturer'],
    'job': ['title', 'company', 'location', 'date_posted'],
    'movie': ['title', 'director', 'genre', 'mpaa_rating'],
    'nbaplayer': ['name', 'team', 'height', 'weight'],
    'restaurant': ['name', 'address', 'phone', 'cuisine'],
    'university': ['name', 'phone', 'website', 'type']
}

DATA_HOME = '/mnt/data122/harryhuang/swde/sourceCode'
OUTPUT_HOME = '../dataset/swde/mistral/seq'
PATTERN = 'seq'

tot = 0
ac_list = []
for field in SCHEMA.keys():
    if not os.path.exists(os.path.join(OUTPUT_HOME, field)):
        os.makedirs(os.path.join(OUTPUT_HOME, field))

    for website_path in glob.glob(os.path.join(OUTPUT_HOME, field, '*')):
        if not f'_{PATTERN}.json' in website_path:
            continue
        #print(website_name)
        xpath_rule = {}
        with open(website_path) as f:
            xpath_rule = json.load(f)

        # Rule execution
        result_list = []
        for item in SCHEMA[field]:
            for action_sequence in xpath_rule[item]:
                if '.=' in action_sequence:
                    ac_list.append(action_sequence)
                    tot += 1

with open(os.path.join(OUTPUT_HOME, 'equal_.json'), 'w') as f:
    json.dump(ac_list, f, ensure_ascii=False, indent=4)
print(tot)