@inproceedings{wenjing-etal-2021-improving,
title = "Improving Low-Resource Named Entity Recognition via Label-Aware Data Augmentation and Curriculum Denoising",
author = "Wenjing, Zhu and
Jian, Liu and
Jinan, Xu and
Yufeng, Chen and
Yujie, Zhang",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.ccl-1.101/",
pages = "1131--1142",
language = "eng",
abstract = "Deep neural networks have achieved state-of-the-art performances on named entity recognition(NER) with sufficient training data while they perform poorly in low-resource scenarios due to data scarcity. To solve this problem we propose a novel data augmentation method based on pre-trained language model (PLM) and curriculum learning strategy. Concretely we use the PLMto generate diverse training instances through predicting different masked words and design atask-specific curriculum learning strategy to alleviate the influence of noises. We evaluate the effectiveness of our approach on three datasets: CoNLL-2003 OntoNotes5.0 and MaScip of which the first two are simulated low-resource scenarios and the last one is a real low-resource dataset in material science domain. Experimental results show that our method consistently outperform the baseline model. Specifically our method achieves an absolute improvement of3.46{\%} F1 score on the 1{\%} CoNLL-2003 2.58{\%} on the 1{\%} OntoNotes5.0 and 0.99{\%} on the full of MaScip."
}
Markdown (Informal)
[Improving Low-Resource Named Entity Recognition via Label-Aware Data Augmentation and Curriculum Denoising](https://preview.aclanthology.org/fix-sig-urls/2021.ccl-1.101/) (Wenjing et al., CCL 2021)
ACL