@inproceedings{porada-etal-2022-pre,
title = "Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge",
author = "Porada, Ian and
Sordoni, Alessandro and
Cheung, Jackie",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.naacl-main.337/",
doi = "10.18653/v1/2022.naacl-main.337",
pages = "4550--4557",
abstract = "Transformer models pre-trained with a masked-language-modeling objective (e.g., BERT) encode commonsense knowledge as evidenced by behavioral probes; however, the extent to which this knowledge is acquired by systematic inference over the semantics of the pre-training corpora is an open question. To answer this question, we selectively inject verbalized knowledge into the pre-training minibatches of BERT and evaluate how well the model generalizes to supported inferences after pre-training on the injected knowledge. We find generalization does not improve over the course of pre-training BERT from scratch, suggesting that commonsense knowledge is acquired from surface-level, co-occurrence patterns rather than induced, systematic reasoning."
}
Markdown (Informal)
[Does Pre-training Induce Systematic Inference? How Masked Language Models Acquire Commonsense Knowledge](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.naacl-main.337/) (Porada et al., NAACL 2022)
ACL