#!/usr/bin/env bash

# summDIR="/home/tiger/summ/MSPM"
# unsupervisedDIR=$2

DIR=$1
lmDIR=$2
DATAVER=$3
split=$4

get_seeded_random()
{
  seed="$1"
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
    </dev/zero 2>/dev/null
}

OUTDIR=$DIR/$DATAVER
mkdir $OUTDIR

echo "[training set] Concat unsupervised data with summarization data ..."
cat $DIR/en/train.en.spm.doc $DIR/fr/train.fr.spm.doc $lmDIR/es.doc.spm.noise $lmDIR/de.doc.spm.noise $lmDIR/zh.doc.spm.noise > $OUTDIR/train.noshuffle.spm.doc
cat $DIR/en/train.en.spm.sum $DIR/fr/train.fr.spm.sum $lmDIR/es.doc.spm.sample $lmDIR/de.doc.spm.sample $lmDIR/zh.doc.spm.sample > $OUTDIR/train.noshuffle.spm.sum

echo "shuffling"
shuf --random-source=<(get_seeded_random 66) $OUTDIR/train.noshuffle.spm.doc > $OUTDIR/train.spm.doc
shuf --random-source=<(get_seeded_random 66) $OUTDIR/train.noshuffle.spm.sum > $OUTDIR/train.spm.sum

echo "[training set] Concat unsupervised data with summarization data ..."
cat $DIR/en/dev.en.spm.doc $DIR/fr/dev.fr.spm.doc > $OUTDIR/dev.noshuffle.spm.doc
cat $DIR/en/dev.en.spm.sum $DIR/fr/dev.fr.spm.sum > $OUTDIR/dev.noshuffle.spm.sum

echo "shuffling"
shuf --random-source=<(get_seeded_random 66) $OUTDIR/dev.noshuffle.spm.doc > $OUTDIR/dev.spm.doc
shuf --random-source=<(get_seeded_random 66) $OUTDIR/dev.noshuffle.spm.sum > $OUTDIR/dev.spm.sum

