rootdir="/home/tiger" # to be filled

drive_dir=/mnt/bd/lab-wxz/

un_dir=${drive_dir}/clt/UN/dev/
working_dir=${rootdir}/crossLingual
component_dir=~/component
graph_dir=${rootdir}/graphs
if [ ! -d "${component_dir}"  ] ; then
    mkdir -p "${component_dir}"
fi

if [ ! -d "${graph_dir}"  ] ; then
    mkdir -p "${graph_dir}"
fi

declare -A LANG_TAGS
LANG_TAGS=([en]="en_XX" [fr]="fr_XX" [zh]="zh_CN")

cd ${working_dir}

# # dump mbart encoder representation
skip_preprocess="false"
for lg in en zh fr
do
    TAG=${LANG_TAGS[$lg]}
    bash dump_text_emb_mbart.sh mspm4_xgiga ${TAG} ${un_dir}/UNv1.0.6way.${lg}.first_5000.txt UN_${lg} ${skip_preprocess} "{'arch': 'mbart_summ_abs_large', 'doc_state': 'encoder', 'encoder_version': 'v2'}"
    skip_preprocess="true"
done
echo "Dump mbart encoder is finished"

# # Dump mbart projected representation
# Download Component
hdfs dfs -get hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0/Datasets/cc100/components/* ${component_path}/
hdfs dfs -get hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0/Workspace/Multilingual/ComponentConfig/baseConfig.json ~/layer12.json

skip_preprocess="false"
for lg in en zh fr
do
    TAG=${LANG_TAGS[$lg]}
    bash dump_text_emb_mbart.sh mspm4_xgiga ${TAG} ${un_dir}/UNv1.0.6way.${lg}.first_5000.txt UN_${lg}_proj ${skip_preprocess} "{'arch': 'mbart_summ_abs_large', 'doc_state': 'proj', 'encoder_version': 'v1', 'component_config': '/home/tiger/layer12.json'}"
    skip_preprocess="true"
done
echo "Dump mbart projection is finished"

skip_preprocess="false"
for lg in en zh fr
do
    TAG=${LANG_TAGS[$lg]}
    bash dump_text_emb_model_TA.sh ${TAG} ${un_dir}/UNv1.0.6way.${lg}.first_5000.txt UN_${lg}_fused ${skip_preprocess} hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0/Workspace/Multilingual/xgiga/ZhEnFr_Unsup_infillNoise_r0.35_maxlen256_right_fix/checkpoints/mbartV2_cc100_adapterV2_v1EncComR_projLN_2way_encSharedLA_decSharedLA_trainLA_ls0 en_final_ln.json en_pre_ln.json layer12 hdfs://haruna/home/byte_arnold_hl_mlnlc/user/wuxianze.0/Workspace/Multilingual/xgiga/ZhEnFr_Unsup_infillNoise_r0.35_maxlen256_right_fix/checkpoints/mbartV2_cc100_adapterV2_v1EncComR_projLN_2way_encSharedLA_decSharedLA_trainLA_ls0
    skip_preprocess="true"
done

r=6

# visualize mbart representations
python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_en/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_fr/document_embedding.jsonl -n mbart_enfr -o ${graph_dir}

python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_en/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_zh/document_embedding.jsonl -n mbart_enzh -o ${graph_dir}

# visualize projection results
python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_en_proj/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_fr_proj/document_embedding.jsonl -n mbart_enfr_proj -o ${graph_dir}

python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_en_proj/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_encoder/embedding/UN_zh_proj/document_embedding.jsonl -n mbart_enzh_proj -o ${graph_dir}
echo "Visualization is finished"

# visualize fused results
python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_model_TA/embedding/UN_en_fused/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_model_TA/embedding/UN_fr_fused/document_embedding.jsonl -n mbart_enfr_fused -o ${graph_dir}

python3 visualizeEmb.py -m visualizeDocEmbPair --system-files ${rootdir}/xgiga_dumpEmb_model_TA/embedding/UN_en_proj/document_embedding.jsonl ${rootdir}/xgiga_dumpEmb_model_TA/embedding/UN_zh_proj/document_embedding.jsonl -n mbart_enzh_fused -o ${graph_dir}
echo "Visualization is finished"