@inproceedings{kim-etal-2017-method,
title = "A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains",
author = "Kim, Juae and
Kwon, Sunjae and
Ko, Youngjoong and
Seo, Jungyun",
editor = "Jonnagaddala, Jitendra and
Dai, Hong-Jie and
Chang, Yung-Chun",
booktitle = "Proceedings of the International Workshop on Digital Disease Detection using Social Media 2017 ({DDDSM}-2017)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5807/",
pages = "47--51",
abstract = "Biomedical Named Entity (NE) recognition is a core technique for various works in the biomedical domain. In previous studies, using machine learning algorithm shows better performance than dictionary-based and rule-based approaches because there are too many terminological variations of biomedical NEs and new biomedical NEs are constantly generated. To achieve the high performance with a machine-learning algorithm, good-quality corpora are required. However, it is difficult to obtain the good-quality corpora because an-notating a biomedical corpus for ma-chine-learning is extremely time-consuming and costly. In addition, most previous corpora are insufficient for high-level tasks because they cannot cover various domains. Therefore, we propose a method for generating a large amount of machine-labeled data that covers various domains. To generate a large amount of machine-labeled data, firstly we generate an initial machine-labeled data by using a chunker and MetaMap. The chunker is developed to extract only biomedical NEs with manually annotated data. MetaMap is used to annotate the category of bio-medical NE. Then we apply the self-training approach to bootstrap the performance of initial machine-labeled data. In our experiments, the biomedical NE recognition system that is trained with our proposed machine-labeled data achieves much high performance. As a result, our system outperforms biomedical NE recognition system that using MetaMap only with 26.03{\%}p improvements on F1-score."
}
Markdown (Informal)
[A Method to Generate a Machine-Labeled Data for Biomedical Named Entity Recognition with Various Sub-Domains](https://preview.aclanthology.org/jlcl-multiple-ingestion/W17-5807/) (Kim et al., 2017)
ACL