@inproceedings{suntwal-etal-2019-importance,
title = "On the Importance of Delexicalization for Fact Verification",
author = "Suntwal, Sandeep and
Paul, Mithun and
Sharp, Rebecca and
Surdeanu, Mihai",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/D19-1340/",
doi = "10.18653/v1/D19-1340",
pages = "3413--3418",
abstract = "While neural networks produce state-of-the-art performance in many NLP tasks, they generally learn from lexical information, which may transfer poorly between domains. Here, we investigate the importance that a model assigns to various aspects of data while learning and making predictions, specifically, in a recognizing textual entailment (RTE) task. By inspecting the attention weights assigned by the model, we confirm that most of the weights are assigned to noun phrases. To mitigate this dependence on lexicalized information, we experiment with two strategies of masking. First, we replace named entities with their corresponding semantic tags along with a unique identifier to indicate lexical overlap between claim and evidence. Second, we similarly replace other word classes in the sentence (nouns, verbs, adjectives, and adverbs) with their super sense tags (Ciaramita and Johnson, 2003). Our results show that, while performance on the in-domain dataset remains on par with that of the model trained on fully lexicalized data, it improves considerably when tested out of domain. For example, the performance of a state-of-the-art RTE model trained on the masked Fake News Challenge (Pomerleau and Rao, 2017) data and evaluated on Fact Extraction and Verification (Thorne et al., 2018) data improved by over 10{\%} in accuracy score compared to the fully lexicalized model."
}
Markdown (Informal)
[On the Importance of Delexicalization for Fact Verification](https://preview.aclanthology.org/fix-sig-urls/D19-1340/) (Suntwal et al., EMNLP-IJCNLP 2019)
ACL
- Sandeep Suntwal, Mithun Paul, Rebecca Sharp, and Mihai Surdeanu. 2019. On the Importance of Delexicalization for Fact Verification. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 3413–3418, Hong Kong, China. Association for Computational Linguistics.