import sys
import json

utts = json.load(open(sys.argv[1], 'r'))

with open('out', 'w') as out:
    for utt in utts:
        if utt['length'] == 1 and len(utt['slots']) and utt['utterance']:
            utterance = utt['utterance'].lower()
            slots = sorted(utt['slots'], key= lambda x: x['start'])
            slot_start = slots[0]['start']
            slotting = []
            current = 0
            for sd in slots:
                slotting.extend([(w, 'other') for w in utterance[current:sd['start']].split()])
                slotting.extend([(w, sd['slot']) for w in utterance[sd['start']:sd['exclusive_end']].split()])
                current = sd['exclusive_end']
            if current < len(utterance):
                slotting.extend([(w, 'other') for w in utterance[current:].split()])
            cleaned_slotting = []
            for t, s in slotting:
                for p in ',;:.!?|':
                    t = t.replace(p, '')
                if t:
                    cleaned_slotting.append('{}|{}'.format(t, s))
            out.write('dstc8\tdstc8\t' + ' '.join(cleaned_slotting) + '\n')
