@inproceedings{kamath-vajjala-2025-synthetic,
title = "Does Synthetic Data Help Named Entity Recognition for Low-Resource Languages?",
author = "Kamath, Gaurav and
Vajjala, Sowmya",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-short.15/",
pages = "159--167",
ISBN = "979-8-89176-299-2",
abstract = "We explore whether synthetic datasets generated by large language models using a few high quality seed samples are useful for low-resource named entity recognition, considering 11 languages from three language families. Our results suggest that synthetic data created with such seed data is a reasonable choice when there is no available labeled data, and is better than using entirely automatically labeled data. However, a small amount of high-quality data, coupled with cross-lingual transfer from a related language, always offers better performance. Data and code available at: https://github.com/grvkamath/low-resource-syn-ner."
}Markdown (Informal)
[Does Synthetic Data Help Named Entity Recognition for Low-Resource Languages?](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-short.15/) (Kamath & Vajjala, IJCNLP-AACL 2025)
ACL
- Gaurav Kamath and Sowmya Vajjala. 2025. Does Synthetic Data Help Named Entity Recognition for Low-Resource Languages?. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 159–167, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.