#!/usr/bin/bash
WORKSPACE=/workspace
TOOLS_DIR=$WORKSPACE/tools
CODES_DIR=$WORKSPACE/codes/fairseq_master
SCRIPTS=$TOOLS_DIR/mosesdecoder-master/scripts
TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
DETOKENIZER=$SCRIPTS/tokenizer/detokenizer.perl
NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
BPEROOT=$TOOLS_DIR/subword-nmt

data_dir=.

model_path=/workspace/models/opus_predict_enzh2zh_6_6_0.001_10_noise_0.6_del_replace_insert/checkpoint_last.pt

source_lang='en'
target_lang='zh'
bpe_codes=../${source_lang}-${target_lang}.codes.model

export CUDA_VISIBLE_DEVICES='1'

python ${CODES_DIR}/generate.py $data_dir/data-bin \
       --task multilingual_translation \
       --gen-subset train \
       --source-lang ${source_lang} \
       --target-lang ${target_lang} \
       --lang-pairs ${source_lang}-${target_lang} \
       --path $model_path \
       --beam 4 \
       --batch-size 128 \
       > ${data_dir}/bpe.out_noise_0.4

grep ^H ${data_dir}/bpe.out_noise_0.4 | cut -f3 >${data_dir}/bpe.out.noise0.4.clean

sh $TOOLS_DIR/sentencepiece-tool/decode_sp2rawText.sh ${bpe_codes} ${data_dir}/bpe.out.noise0.4.clean ${data_dir}/zh.out
