@inproceedings{verma-etal-2023-comparing,
title = "Comparing and combining some popular {NER} approaches on Biomedical tasks",
author = "Verma, Harsh and
Bergler, Sabine and
Tahaei, Narjesossadat",
editor = "Demner-fushman, Dina and
Ananiadou, Sophia and
Cohen, Kevin",
booktitle = "The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.bionlp-1.24/",
doi = "10.18653/v1/2023.bionlp-1.24",
pages = "273--279",
abstract = "We compare three simple and popular approaches for NER: 1) SEQ (sequence labeling with a linear token classifier) 2) SeqCRF (sequence labeling with Conditional Random Fields), and 3) SpanPred (span prediction with boundary token embeddings). We compare the approaches on 4 biomedical NER tasks: GENIA, NCBI-Disease, LivingNER (Spanish), and SocialDisNER (Spanish). The SpanPred model demonstrates state-of-the-art performance on LivingNER and SocialDisNER, improving F1 by 1.3 and 0.6 F1 respectively. The SeqCRF model also demonstrates state-of-the-art performance on LivingNER and SocialDisNER, improving F1 by 0.2 F1 and 0.7 respectively. The SEQ model is competitive with the state-of-the-art on LivingNER dataset. We explore some simple ways of combining the three approaches. We find that majority voting consistently gives high precision and high F1 across all 4 datasets. Lastly, we implement a system that learns to combine SEQ`s and SpanPred`s predictions, generating systems that give high recall and high F1 across all 4 datasets. On the GENIA dataset, we find that our learned combiner system significantly boosts F1(+1.2) and recall(+2.1) over the systems being combined."
}
Markdown (Informal)
[Comparing and combining some popular NER approaches on Biomedical tasks](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.bionlp-1.24/) (Verma et al., BioNLP 2023)
ACL