@inproceedings{fu-etal-2022-contextual,
title = "Contextual Representation Learning beyond Masked Language Modeling",
author = "Fu, Zhiyi and
Zhou, Wangchunshu and
Xu, Jingjing and
Zhou, Hao and
Li, Lei",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.193/",
doi = "10.18653/v1/2022.acl-long.193",
pages = "2701--2714",
abstract = "Currently, masked language modeling (e.g., BERT) is the prime choice to learn contextualized representations. Due to the pervasiveness, it naturally raises an interesting question: how do masked language models (MLMs) learn contextual representations? In this work, we analyze the learning dynamics of MLMs and find that it adopts sampled embeddings as anchors to estimate and inject contextual semantics to representations, which limits the efficiency and effectiveness of MLMs. To address these problems, we propose TACO, a simple yet effective representation learning approach to directly model global semantics. To be specific, TACO extracts and aligns contextual semantics hidden in contextualized representations to encourage models to attend global semantics when generating contextualized representations. Experiments on the GLUE benchmark show that TACO achieves up to 5x speedup and up to 1.2 points average improvement over MLM."
}
Markdown (Informal)
[Contextual Representation Learning beyond Masked Language Modeling](https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.193/) (Fu et al., ACL 2022)
ACL