@inproceedings{calderon-etal-2022-docogen,
title = "{D}o{C}o{G}en: {D}omain Counterfactual Generation for Low Resource Domain Adaptation",
author = "Calderon, Nitay and
Ben-David, Eyal and
Feder, Amir and
Reichart, Roi",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.acl-long.533/",
doi = "10.18653/v1/2022.acl-long.533",
pages = "7727--7746",
abstract = "Natural language processing (NLP) algorithms have become very successful, but they still struggle when applied to out-of-distribution examples. In this paper we propose a controllable generation approach in order to deal with this domain adaptation (DA) challenge. Given an input text example, our DoCoGen algorithm generates a domain-counterfactual textual example (D-con) - that is similar to the original in all aspects, including the task label, but its domain is changed to a desired one. Importantly, DoCoGen is trained using only unlabeled examples from multiple domains - no NLP task labels or parallel pairs of textual examples and their domain-counterfactuals are required. We show that DoCoGen can generate coherent counterfactuals consisting of multiple sentences. We use the D-cons generated by DoCoGen to augment a sentiment classifier and a multi-label intent classifier in 20 and 78 DA setups, respectively, where source-domain labeled data is scarce. Our model outperforms strong baselines and improves the accuracy of a state-of-the-art unsupervised DA algorithm."
}
Markdown (Informal)
[DoCoGen: Domain Counterfactual Generation for Low Resource Domain Adaptation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.acl-long.533/) (Calderon et al., ACL 2022)
ACL