#!/usr/bin/env bash

# summDIR="/home/tiger/summ/MSPM"
# unsupervisedDIR=$2

DIR=$1
lmDIR=$2
DATAVER=$3
split=$4

TMPDIR=~/tmp_concat2
mkdir $TMPDIR

get_seeded_random()
{
  seed="$1"
  openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \
    </dev/zero 2>/dev/null
}

OUTDIR=$DIR/$DATAVER
mkdir $OUTDIR

# echo "[training set] Concat unsupervised data with summarization data ..."
# cat $DIR/en/train.en.spm.doc $DIR/fr/train.fr.spm.doc $lmDIR/es.doc.spm.noise $lmDIR/de.doc.spm.noise $lmDIR/zh.doc.spm.noise > $OUTDIR/train.noshuffle.spm.doc
# cat $DIR/en/train.en.spm.sum $DIR/fr/train.fr.spm.sum $lmDIR/es.doc.spm.sample $lmDIR/de.doc.spm.sample $lmDIR/zh.doc.spm.sample > $OUTDIR/train.noshuffle.spm.sum

echo "[training set] Concat unsupervised data ..."
cat $lmDIR/es.doc.spm.noise $lmDIR/de.doc.spm.noise $lmDIR/zh.doc.spm.noise > $TMPDIR/un.spm.noise
cat $lmDIR/es.doc.spm.sample $lmDIR/de.doc.spm.sample $lmDIR/zh.doc.spm.sample > $TMPDIR/un.spm.sample

echo "[training set] Concat summarization data ..."
cat $DIR/en/train.en.spm.doc $DIR/fr/train.fr.spm.doc > $TMPDIR/summ.spm.doc
cat $DIR/en/train.en.spm.sum $DIR/fr/train.fr.spm.sum > $TMPDIR/summ.spm.sum

paste -d '\t' $TMPDIR/un.spm.noise $TMPDIR/un.spm.sample > $TMPDIR/un.spm.joint
paste -d '\t' $TMPDIR/summ.spm.doc $TMPDIR/summ.spm.sum > $TMPDIR/summ.spm.joint

python3 ../sampleByBatch.py -m sampleByBatch -i1 $TMPDIR/un.spm.joint -i2 $TMPDIR/summ.spm.joint -o $TMPDIR/train.noshuffle.spm.joint

cat $TMPDIR/train.noshuffle.spm.joint | cut -f 1 > $OUTDIR/train.noshuffle.spm.doc
cat $TMPDIR/train.noshuffle.spm.joint | cut -f 2 > $OUTDIR/train.noshuffle.spm.sum

echo "shuffling"
shuf --random-source=<(get_seeded_random 66) $OUTDIR/train.noshuffle.spm.doc > $OUTDIR/train.spm.doc
shuf --random-source=<(get_seeded_random 66) $OUTDIR/train.noshuffle.spm.sum > $OUTDIR/train.spm.sum

echo "[training set] Concat unsupervised data with summarization data ..."
cat $DIR/en/dev.en.spm.doc $DIR/fr/dev.fr.spm.doc > $OUTDIR/dev.noshuffle.spm.doc
cat $DIR/en/dev.en.spm.sum $DIR/fr/dev.fr.spm.sum > $OUTDIR/dev.noshuffle.spm.sum

echo "shuffling"
shuf --random-source=<(get_seeded_random 66) $OUTDIR/dev.noshuffle.spm.doc > $OUTDIR/dev.spm.doc
shuf --random-source=<(get_seeded_random 66) $OUTDIR/dev.noshuffle.spm.sum > $OUTDIR/dev.spm.sum

rm -r $TMPDIR
