@inproceedings{aggarwal-etal-2023-ecg,
title = "{ECG}-{QALM}: Entity-Controlled Synthetic Text Generation using Contextual {Q}{\&}{A} for {NER}",
author = "Aggarwal, Karan and
Jin, Henry and
Ahmad, Aitzaz",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.349/",
doi = "10.18653/v1/2023.findings-acl.349",
pages = "5649--5660",
abstract = "Named Entity Recognition (NER) state-of-the-art methods requires high-quality labeled datasets. Issues such as scarcity of labeled data, under-representation of entities, and privacy concerns with using sensitive data for training, can be significant barriers. Generating synthetic data to train models is a promising solution to mitigate these problems. We propose ECG-QALM, a contextual question and answering approach using pre-trained language models to synthetically generate entity-controlled text. Generated text is then used to augment small labeled datasets for downstream NER tasks. We evaluate our method on two publicly available datasets. We find ECG-QALM is capable of producing full text samples with desired entities appearing in a controllable way, while retaining sentence coherence closest to the real world data. Evaluations on NER tasks show significant improvements (75{\%} - 140{\%}) in low-labeled data regimes."
}
Markdown (Informal)
[ECG-QALM: Entity-Controlled Synthetic Text Generation using Contextual Q&A for NER](https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.349/) (Aggarwal et al., Findings 2023)
ACL