import json
import os
from collections import Counter

train_file = 'top/ripe_data/train.json'
val_file = 'top/ripe_data/valid.json'
test_file = 'top/ripe_data/test.json'

os.makedirs('top_split1/ripe_data', exist_ok=True)
os.makedirs('top_split2/ripe_data', exist_ok=True)
os.makedirs('top_split3/ripe_data', exist_ok=True)

with open(train_file, 'r', encoding='utf-8') as f:
    train_data = [json.loads(i) for i in f.readlines()]

with open(val_file, 'r', encoding='utf-8') as f:
    valid_data = [json.loads(i) for i in f.readlines()]

with open(test_file, 'r', encoding='utf-8') as f:
    test_data = [json.loads(i) for i in f.readlines()]

split_intent = [
    [
        ('GET_EVENT', 8860), 
        ('UNSUPPORTED_NAVIGATION', 1511), 
        ('GET_ESTIMATED_ARRIVAL', 1495), 
        ('GET_EVENT_ATTENDEE', 4),
        ('COMBINE', 61), 
        ('GET_EVENT_ORGANIZER', 4)], 
    [
        ('GET_INFO_TRAFFIC', 8773),
        ('GET_DISTANCE', 1742),
        ('UNSUPPORTED', 1046),
        ('UNSUPPORTED_EVENT', 298),
        ('GET_LOCATION', 56),
        ('UNINTELLIGIBLE', 4)], 
    [
        ('GET_ESTIMATED_DURATION', 4158),
        ('GET_DIRECTIONS', 1859),
        ('GET_ESTIMATED_DEPARTURE', 738),
        ('GET_INFO_ROAD_CONDITION', 376),
        ('UPDATE_DIRECTIONS', 245), 
        ('GET_INFO_ROUTE', 45),
        ('GET_EVENT_ATTENDEE_AMOUNT', 4)]
]
    
for i in range(3):
    split_intent[i] = set([j[0] for j in split_intent[i]])
# train
with open('top_split1/ripe_data/train.json', 'w', encoding='utf-8') as f:
    for i in train_data:
        if i['intent'] in split_intent[0]:
            print(json.dumps(i), file=f)
with open('top_split2/ripe_data/train.json', 'w', encoding='utf-8') as f:
    for i in train_data:
        if i['intent'] in split_intent[1]:
            print(json.dumps(i), file=f)
with open('top_split3/ripe_data/train.json', 'w', encoding='utf-8') as f:
    for i in train_data:
        if i['intent'] in split_intent[2]:
            print(json.dumps(i), file=f)
# valid
with open('top_split1/ripe_data/valid.json', 'w', encoding='utf-8') as f:
    for i in valid_data:
        if i['intent'] in split_intent[0]:
            print(json.dumps(i), file=f)
with open('top_split2/ripe_data/valid.json', 'w', encoding='utf-8') as f:
    for i in valid_data:
        if i['intent'] in split_intent[1]:
            print(json.dumps(i), file=f)
with open('top_split3/ripe_data/valid.json', 'w', encoding='utf-8') as f:
    for i in valid_data:
        if i['intent'] in split_intent[2]:
            print(json.dumps(i), file=f)
# test
with open('top_split1/ripe_data/test.json', 'w', encoding='utf-8') as f:
    for i in test_data:
        if i['intent'] in split_intent[0]:
            print(json.dumps(i), file=f)
with open('top_split2/ripe_data/test.json', 'w', encoding='utf-8') as f:
    for i in test_data:
        if i['intent'] in split_intent[1]:
            print(json.dumps(i), file=f)
with open('top_split3/ripe_data/test.json', 'w', encoding='utf-8') as f:
    for i in test_data:
        if i['intent'] in split_intent[2]:
            print(json.dumps(i), file=f)
print('fin.')
