import json, re


with open('multiwoz2.1/origin/attraction_db.json') as f:
    attraction_db = json.load(f)


with open('multiwoz2.1/origin/hotel_db.json') as f:
    hotel_db = json.load(f)

with open('multiwoz2.1/origin/restaurant_db.json') as f:
    restaurant_db = json.load(f)

# 'Parkside, Cambridge'

address = set()
compare = set()
from ontology import  Ontology
onto = Ontology()

for item in attraction_db + hotel_db + restaurant_db:
    addr = item['address']
    domain, norm_v, _  = onto.find_name(addr)
    if re.search(r"^\d+ ", addr) or not norm_v:
        address.add(addr)
        
    for a in addr.split(','):
        a = a.strip()
        domain, norm_v, _ = onto.find_name(a)
        if re.search(r"^\d+ ", a) or not norm_v:
            address.add(a)
    
import pprint
pprint.pprint(address)

address = sorted(list(address), key=lambda x:len(x), reverse=True)
with open('all_address.json', 'w') as f:
    json.dump(address, f, indent=2)






