@inproceedings{yang-etal-2021-simple,
title = "A Simple and Effective Method To Eliminate the Self Language Bias in Multilingual Representations",
author = "Yang, Ziyi and
Yang, Yinfei and
Cer, Daniel and
Darve, Eric",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.470/",
doi = "10.18653/v1/2021.emnlp-main.470",
pages = "5825--5832",
abstract = "Language agnostic and semantic-language information isolation is an emerging research direction for multilingual representations models. We explore this problem from a novel angle of geometric algebra and semantic space. A simple but highly effective method {\textquotedblleft}Language Information Removal (LIR){\textquotedblright} factors out language identity information from semantic related components in multilingual representations pre-trained on multi-monolingual data. A post-training and model-agnostic method, LIR only uses simple linear operations, e.g. matrix factorization and orthogonal projection. LIR reveals that for weak-alignment multilingual systems, the principal components of semantic spaces primarily encodes language identity information. We first evaluate the LIR on a cross-lingual question answer retrieval task (LAReQA), which requires the strong alignment for the multilingual embedding space. Experiment shows that LIR is highly effectively on this task, yielding almost 100{\%} relative improvement in MAP for weak-alignment models. We then evaluate the LIR on Amazon Reviews and XEVAL dataset, with the observation that removing language information is able to improve the cross-lingual transfer performance."
}
Markdown (Informal)
[A Simple and Effective Method To Eliminate the Self Language Bias in Multilingual Representations](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.emnlp-main.470/) (Yang et al., EMNLP 2021)
ACL