@inproceedings{bhargava-etal-2021-generalization,
title = "Generalization in {NLI}: Ways (Not) To Go Beyond Simple Heuristics",
author = "Bhargava, Prajjwal and
Drozd, Aleksandr and
Rogers, Anna",
editor = "Sedoc, Jo{\~a}o and
Rogers, Anna and
Rumshisky, Anna and
Tafreshi, Shabnam",
booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.insights-1.18/",
doi = "10.18653/v1/2021.insights-1.18",
pages = "125--135",
abstract = "Much of recent progress in NLU was shown to be due to models' learning dataset-specific heuristics. We conduct a case study of generalization in NLI (from MNLI to the adversarially constructed HANS dataset) in a range of BERT-based architectures (adapters, Siamese Transformers, HEX debiasing), as well as with subsampling the data and increasing the model size. We report 2 successful and 3 unsuccessful strategies, all providing insights into how Transformer-based models learn to generalize."
}
Markdown (Informal)
[Generalization in NLI: Ways (Not) To Go Beyond Simple Heuristics](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.insights-1.18/) (Bhargava et al., insights 2021)
ACL