@inproceedings{joshi-he-2022-investigation,
title = "An Investigation of the (In)effectiveness of Counterfactually Augmented Data",
author = "Joshi, Nitish and
He, He",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.acl-long.256/",
doi = "10.18653/v1/2022.acl-long.256",
pages = "3668--3681",
abstract = "While pretrained language models achieve excellent performance on natural language understanding benchmarks, they tend to rely on spurious correlations and generalize poorly to out-of-distribution (OOD) data. Recent work has explored using counterfactually-augmented data (CAD){---}data generated by minimally perturbing examples to flip the ground-truth label{---}to identify robust features that are invariant under distribution shift. However, empirical results using CAD during training for OOD generalization have been mixed. To explain this discrepancy, through a toy theoretical example and empirical analysis on two crowdsourced CAD datasets, we show that: (a) while features perturbed in CAD are indeed robust features, it may prevent the model from learning unperturbed robust features; and (b) CAD may exacerbate existing spurious correlations in the data. Our results thus show that the lack of perturbation diversity limits CAD`s effectiveness on OOD generalization, calling for innovative crowdsourcing procedures to elicit diverse perturbation of examples."
}
Markdown (Informal)
[An Investigation of the (In)effectiveness of Counterfactually Augmented Data](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.acl-long.256/) (Joshi & He, ACL 2022)
ACL