# Script to download and preprocess DSTC8 data to TSV format

# Specify absolute path to parse_dstc8_json.py script
PYTHON_PARSE_JSON_SCRIPT=~/parse_dstc8_json.py

git clone https://github.com/google-research-datasets/dstc8-schema-guided-dialogue.git
cd dstc8-schema-guided-dialogue
mkdir all
for partition in train dev test
do
    cd $partition
    for d in dialogues_*.json
    do
        jq -r '[.[].turns[] | {utterance: .utterance, length: .frames | length, slots: .frames[0].slots, speaker: .speaker}]' $d > slots.json
        python $PYTHON_PARSE_JSON_SCRIPT slots.json
        mv out ../all/$partition.$d.out
    done
    cd ../
done
cd all
cat *.out > all.tsv; rm *.out
