#!/usr/bin/env fish

# This script is used to generate the final "xxx" set from xxx's annotations.
# They use a different format for the dev set, as they don't have a username field. I
# have no idea why that is the case. The train set is the same as the rest, so we can
# reshape the dev set to match the train set and then merge them together.

# Command to generate the final "xxx" set:
# ./shape_xxx.fish dev.json xxx_dev.json xxx_train.json > xxx.json
# "xxx" is then used alognside xinyu.json, 2f2b33b6.json etc.

if test (count $argv) -ne 3; or contains -- --help $argv
    echo "Usage: $(basename (status -f)) <dev_ann> <dev_out> <train>"
    echo "Output to stdout"
    exit 1
end

# Path to the dev set file with xxx's annotations
set dev_ann $argv[1]
# Path to output file from transforming the dev set
set dev_out $argv[2]
# Path to the existing xxx train set annotation
set train $argv[3]

# Transform xxx's dev annotations to the same format as other
# annotations
jq '{username: "xxx", items: .}' $dev_ann >$dev_out

# Merge xxx_dev.json and xxx_train.json into a single file
# Information on the data source (i.e. which split is the item from) is lost,
# but will be restored with clean_data.py
jq -s '.[0] * {items: ([.[0].items, .[1].items] | flatten)}' $dev_out $train
