@inproceedings{sun-etal-2022-alleviating,
title = "Alleviating the Inequality of Attention Heads for Neural Machine Translation",
author = "Sun, Zewei and
Huang, Shujian and
Dai, Xinyu and
Chen, Jiajun",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.coling-1.466/",
pages = "5246--5250",
abstract = "Recent studies show that the attention heads in Transformer are not equal. We relate this phenomenon to the imbalance training of multi-head attention and the model dependence on specific heads. To tackle this problem, we propose a simple masking method: HeadMask, in two specific ways. Experiments show that translation improvements are achieved on multiple language pairs. Subsequent empirical analyses also support our assumption and confirm the effectiveness of the method."
}
Markdown (Informal)
[Alleviating the Inequality of Attention Heads for Neural Machine Translation](https://preview.aclanthology.org/fix-sig-urls/2022.coling-1.466/) (Sun et al., COLING 2022)
ACL