@inproceedings{erdmann-etal-2019-practical,
title = "Practical, Efficient, and Customizable Active Learning for Named Entity Recognition in the Digital Humanities",
author = "Erdmann, Alexander and
Wrisley, David Joseph and
Allen, Benjamin and
Brown, Christopher and
Cohen-Bod{\'e}n{\`e}s, Sophie and
Elsner, Micha and
Feng, Yukun and
Joseph, Brian and
Joyeux-Prunel, B{\'e}atrice and
de Marneffe, Marie-Catherine",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1231",
doi = "10.18653/v1/N19-1231",
pages = "2223--2234",
abstract = "Scholars in inter-disciplinary fields like the Digital Humanities are increasingly interested in semantic annotation of specialized corpora. Yet, under-resourced languages, imperfect or noisily structured data, and user-specific classification tasks make it difficult to meet their needs using off-the-shelf models. Manual annotation of large corpora from scratch, meanwhile, can be prohibitively expensive. Thus, we propose an active learning solution for named entity recognition, attempting to maximize a custom model{'}s improvement per additional unit of manual annotation. Our system robustly handles any domain or user-defined label set and requires no external resources, enabling quality named entity recognition for Humanities corpora where such resources are not available. Evaluating on typologically disparate languages and datasets, we reduce required annotation by 20-60{\%} and greatly outperform a competitive active learning baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erdmann-etal-2019-practical">
<titleInfo>
<title>Practical, Efficient, and Customizable Active Learning for Named Entity Recognition in the Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Erdmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">Joseph</namePart>
<namePart type="family">Wrisley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Brown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophie</namePart>
<namePart type="family">Cohen-Bodénès</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Micha</namePart>
<namePart type="family">Elsner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yukun</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Joseph</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Béatrice</namePart>
<namePart type="family">Joyeux-Prunel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scholars in inter-disciplinary fields like the Digital Humanities are increasingly interested in semantic annotation of specialized corpora. Yet, under-resourced languages, imperfect or noisily structured data, and user-specific classification tasks make it difficult to meet their needs using off-the-shelf models. Manual annotation of large corpora from scratch, meanwhile, can be prohibitively expensive. Thus, we propose an active learning solution for named entity recognition, attempting to maximize a custom model’s improvement per additional unit of manual annotation. Our system robustly handles any domain or user-defined label set and requires no external resources, enabling quality named entity recognition for Humanities corpora where such resources are not available. Evaluating on typologically disparate languages and datasets, we reduce required annotation by 20-60% and greatly outperform a competitive active learning baseline.</abstract>
<identifier type="citekey">erdmann-etal-2019-practical</identifier>
<identifier type="doi">10.18653/v1/N19-1231</identifier>
<location>
<url>https://aclanthology.org/N19-1231</url>
</location>
<part>
<date>2019-jun</date>
<extent unit="page">
<start>2223</start>
<end>2234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Practical, Efficient, and Customizable Active Learning for Named Entity Recognition in the Digital Humanities
%A Erdmann, Alexander
%A Wrisley, David Joseph
%A Allen, Benjamin
%A Brown, Christopher
%A Cohen-Bodénès, Sophie
%A Elsner, Micha
%A Feng, Yukun
%A Joseph, Brian
%A Joyeux-Prunel, Béatrice
%A de Marneffe, Marie-Catherine
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 jun
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F erdmann-etal-2019-practical
%X Scholars in inter-disciplinary fields like the Digital Humanities are increasingly interested in semantic annotation of specialized corpora. Yet, under-resourced languages, imperfect or noisily structured data, and user-specific classification tasks make it difficult to meet their needs using off-the-shelf models. Manual annotation of large corpora from scratch, meanwhile, can be prohibitively expensive. Thus, we propose an active learning solution for named entity recognition, attempting to maximize a custom model’s improvement per additional unit of manual annotation. Our system robustly handles any domain or user-defined label set and requires no external resources, enabling quality named entity recognition for Humanities corpora where such resources are not available. Evaluating on typologically disparate languages and datasets, we reduce required annotation by 20-60% and greatly outperform a competitive active learning baseline.
%R 10.18653/v1/N19-1231
%U https://aclanthology.org/N19-1231
%U https://doi.org/10.18653/v1/N19-1231
%P 2223-2234
Markdown (Informal)
[Practical, Efficient, and Customizable Active Learning for Named Entity Recognition in the Digital Humanities](https://aclanthology.org/N19-1231) (Erdmann et al., NAACL 2019)
ACL
- Alexander Erdmann, David Joseph Wrisley, Benjamin Allen, Christopher Brown, Sophie Cohen-Bodénès, Micha Elsner, Yukun Feng, Brian Joseph, Béatrice Joyeux-Prunel, and Marie-Catherine de Marneffe. 2019. Practical, Efficient, and Customizable Active Learning for Named Entity Recognition in the Digital Humanities. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 2223–2234, Minneapolis, Minnesota. Association for Computational Linguistics.