@inproceedings{walker-etal-2001-sentence,
title = "Sentence boundary detection: a comparison of paradigms for improving {MT} quality",
author = "Walker, Daniel J. and
Clements, David E. and
Darwin, Maki and
Amtrup, Jan W.",
booktitle = "Proceedings of Machine Translation Summit VIII",
month = sep # " 18-22",
year = "2001",
address = "Santiago de Compostela, Spain",
url = "https://aclanthology.org/2001.mtsummit-papers.66",
abstract = "The reliable detection of sentence boundaries in running text is one of the first important steps in preparing an input document for translation. Although this is often neglected, it is necessary to obtain a translation with a high degree of quality. In this paper, we present a comparison of different paradigms for the detection of sentence boundaries in written text. We compare three different approaches: Directly encoding the knowledge in a program, a rule-based system relying on regular expressions to describe boundaries, and a statistical maximum-entropy learning algorithm to obtain knowledge about boundaries. Using the statistical system, we obtain a recall of 98.14{\%}, classifying boundaries of six types, and using a training corpus of under 10,000 sentences.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="walker-etal-2001-sentence">
<titleInfo>
<title>Sentence boundary detection: a comparison of paradigms for improving MT quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Walker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Clements</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maki</namePart>
<namePart type="family">Darwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Amtrup</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2001-sep" 18-22"</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit VIII</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Santiago de Compostela, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The reliable detection of sentence boundaries in running text is one of the first important steps in preparing an input document for translation. Although this is often neglected, it is necessary to obtain a translation with a high degree of quality. In this paper, we present a comparison of different paradigms for the detection of sentence boundaries in written text. We compare three different approaches: Directly encoding the knowledge in a program, a rule-based system relying on regular expressions to describe boundaries, and a statistical maximum-entropy learning algorithm to obtain knowledge about boundaries. Using the statistical system, we obtain a recall of 98.14%, classifying boundaries of six types, and using a training corpus of under 10,000 sentences.</abstract>
<identifier type="citekey">walker-etal-2001-sentence</identifier>
<location>
<url>https://aclanthology.org/2001.mtsummit-papers.66</url>
</location>
<part>
<date>2001-sep" 18-22"</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentence boundary detection: a comparison of paradigms for improving MT quality
%A Walker, Daniel J.
%A Clements, David E.
%A Darwin, Maki
%A Amtrup, Jan W.
%S Proceedings of Machine Translation Summit VIII
%D 2001
%8 sep" 18 22"
%C Santiago de Compostela, Spain
%F walker-etal-2001-sentence
%X The reliable detection of sentence boundaries in running text is one of the first important steps in preparing an input document for translation. Although this is often neglected, it is necessary to obtain a translation with a high degree of quality. In this paper, we present a comparison of different paradigms for the detection of sentence boundaries in written text. We compare three different approaches: Directly encoding the knowledge in a program, a rule-based system relying on regular expressions to describe boundaries, and a statistical maximum-entropy learning algorithm to obtain knowledge about boundaries. Using the statistical system, we obtain a recall of 98.14%, classifying boundaries of six types, and using a training corpus of under 10,000 sentences.
%U https://aclanthology.org/2001.mtsummit-papers.66
Markdown (Informal)
[Sentence boundary detection: a comparison of paradigms for improving MT quality](https://aclanthology.org/2001.mtsummit-papers.66) (Walker et al., MTSummit 2001)
ACL