@inproceedings{nather-2020-depth,
title = "An In-Depth Comparison of 14 Spelling Correction Tools on a Common Benchmark",
author = {N{\"a}ther, Markus},
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.228",
pages = "1849--1857",
abstract = "Determining and correcting spelling and grammar errors in text is an important but surprisingly difficult task. There are several reasons why this remains challenging. Errors may consist of simple typing errors like deleted, substituted, or wrongly inserted letters, but may also consist of word confusions where a word was replaced by another one. In addition, words may be erroneously split into two parts or get concatenated. Some words can contain hyphens, because they were split at the end of a line or are compound words with a mandatory hyphen. In this paper, we provide an extensive evaluation of 14 spelling correction tools on a common benchmark. In particular, the evaluation provides a detailed comparison with respect to 12 error categories. The benchmark consists of sentences from the English Wikipedia, which were distorted using a realistic error model. Measuring the quality of an algorithm with respect to these error categories requires an alignment of the original text, the distorted text and the corrected text provided by the tool. We make our benchmark generation and evaluation tools publicly available.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nather-2020-depth">
<titleInfo>
<title>An In-Depth Comparison of 14 Spelling Correction Tools on a Common Benchmark</title>
</titleInfo>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Näther</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Determining and correcting spelling and grammar errors in text is an important but surprisingly difficult task. There are several reasons why this remains challenging. Errors may consist of simple typing errors like deleted, substituted, or wrongly inserted letters, but may also consist of word confusions where a word was replaced by another one. In addition, words may be erroneously split into two parts or get concatenated. Some words can contain hyphens, because they were split at the end of a line or are compound words with a mandatory hyphen. In this paper, we provide an extensive evaluation of 14 spelling correction tools on a common benchmark. In particular, the evaluation provides a detailed comparison with respect to 12 error categories. The benchmark consists of sentences from the English Wikipedia, which were distorted using a realistic error model. Measuring the quality of an algorithm with respect to these error categories requires an alignment of the original text, the distorted text and the corrected text provided by the tool. We make our benchmark generation and evaluation tools publicly available.</abstract>
<identifier type="citekey">nather-2020-depth</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.228</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>1849</start>
<end>1857</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An In-Depth Comparison of 14 Spelling Correction Tools on a Common Benchmark
%A Näther, Markus
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F nather-2020-depth
%X Determining and correcting spelling and grammar errors in text is an important but surprisingly difficult task. There are several reasons why this remains challenging. Errors may consist of simple typing errors like deleted, substituted, or wrongly inserted letters, but may also consist of word confusions where a word was replaced by another one. In addition, words may be erroneously split into two parts or get concatenated. Some words can contain hyphens, because they were split at the end of a line or are compound words with a mandatory hyphen. In this paper, we provide an extensive evaluation of 14 spelling correction tools on a common benchmark. In particular, the evaluation provides a detailed comparison with respect to 12 error categories. The benchmark consists of sentences from the English Wikipedia, which were distorted using a realistic error model. Measuring the quality of an algorithm with respect to these error categories requires an alignment of the original text, the distorted text and the corrected text provided by the tool. We make our benchmark generation and evaluation tools publicly available.
%U https://aclanthology.org/2020.lrec-1.228
%P 1849-1857
Markdown (Informal)
[An In-Depth Comparison of 14 Spelling Correction Tools on a Common Benchmark](https://aclanthology.org/2020.lrec-1.228) (Näther, LREC 2020)
ACL