#!/usr/bin/env bash

SUMMDIR="/home/tiger/summ/MSPM"
TRANDIR="/home/tiger/wmt14/MSPM"
MERGEDIR="/home/tiger/MSPM"
LG="EnFr"

echo "$MERGEDIR/$LG"
mkdir -p "$MERGEDIR/$LG"

get_seeded_random()
{
  seed="$1"
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
    </dev/zero 2>/dev/null
}

for split in train
do
    head -n 1000000 $TRANDIR/$LG/joint.shuffle.${split}.spm.src > $TRANDIR/$LG/joint.shuffle.first100w.${split}.spm.src 
    head -n 1000000 $TRANDIR/$LG/joint.shuffle.${split}.spm.tgt > $TRANDIR/$LG/joint.shuffle.first100w.${split}.spm.tgt 

    cat $SUMMDIR/$LG/${split}.$LG.spm.doc $TRANDIR/$LG/joint.shuffle.first100w.${split}.spm.src > $MERGEDIR/$LG/multi.${split}.$LG.spm.noshuffle.doc
    cat $SUMMDIR/$LG/${split}.$LG.spm.sum $TRANDIR/$LG/joint.shuffle.first100w.${split}.spm.tgt > $MERGEDIR/$LG/multi.${split}.$LG.spm.noshuffle.sum
    echo "shuffling"
    shuf --random-source=<(get_seeded_random 66) $MERGEDIR/$LG/multi.${split}.$LG.spm.noshuffle.doc > $MERGEDIR/EnFr/multi.${split}.$LG.spm.doc
    shuf --random-source=<(get_seeded_random 66) $MERGEDIR/$LG/multi.${split}.$LG.spm.noshuffle.sum > $MERGEDIR/EnFr/multi.${split}.$LG.spm.sum
done

cp $SUMMDIR/$LG/dev.$LG.spm.doc $MERGEDIR/$LG/dev.$LG.spm.doc
cp $SUMMDIR/$LG/dev.$LG.spm.sum $MERGEDIR/$LG/dev.$LG.spm.sum
