@inproceedings{koyama-etal-2020-construction,
title = "Construction of an Evaluation Corpus for Grammatical Error Correction for Learners of {J}apanese as a Second Language",
author = "Koyama, Aomi and
Kiyuna, Tomoshige and
Kobayashi, Kenji and
Arai, Mio and
Komachi, Mamoru",
booktitle = "Proceedings of the 12th Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.26",
pages = "204--211",
abstract = "The NAIST Lang-8 Learner Corpora (Lang-8 corpus) is one of the largest second-language learner corpora. The Lang-8 corpus is suitable as a training dataset for machine translation-based grammatical error correction systems. However, it is not suitable as an evaluation dataset because the corrected sentences sometimes include inappropriate sentences. Therefore, we created and released an evaluation corpus for correcting grammatical errors made by learners of Japanese as a Second Language (JSL). As our corpus has less noise and its annotation scheme reflects the characteristics of the dataset, it is ideal as an evaluation corpus for correcting grammatical errors in sentences written by JSL learners. In addition, we applied neural machine translation (NMT) and statistical machine translation (SMT) techniques to correct the grammar of the JSL learners{'} sentences and evaluated their results using our corpus. We also compared the performance of the NMT system with that of the SMT system.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koyama-etal-2020-construction">
<titleInfo>
<title>Construction of an Evaluation Corpus for Grammatical Error Correction for Learners of Japanese as a Second Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aomi</namePart>
<namePart type="family">Koyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoshige</namePart>
<namePart type="family">Kiyuna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenji</namePart>
<namePart type="family">Kobayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mio</namePart>
<namePart type="family">Arai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The NAIST Lang-8 Learner Corpora (Lang-8 corpus) is one of the largest second-language learner corpora. The Lang-8 corpus is suitable as a training dataset for machine translation-based grammatical error correction systems. However, it is not suitable as an evaluation dataset because the corrected sentences sometimes include inappropriate sentences. Therefore, we created and released an evaluation corpus for correcting grammatical errors made by learners of Japanese as a Second Language (JSL). As our corpus has less noise and its annotation scheme reflects the characteristics of the dataset, it is ideal as an evaluation corpus for correcting grammatical errors in sentences written by JSL learners. In addition, we applied neural machine translation (NMT) and statistical machine translation (SMT) techniques to correct the grammar of the JSL learners’ sentences and evaluated their results using our corpus. We also compared the performance of the NMT system with that of the SMT system.</abstract>
<identifier type="citekey">koyama-etal-2020-construction</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.26</url>
</location>
<part>
<date>2020-may</date>
<extent unit="page">
<start>204</start>
<end>211</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Construction of an Evaluation Corpus for Grammatical Error Correction for Learners of Japanese as a Second Language
%A Koyama, Aomi
%A Kiyuna, Tomoshige
%A Kobayashi, Kenji
%A Arai, Mio
%A Komachi, Mamoru
%S Proceedings of the 12th Language Resources and Evaluation Conference
%D 2020
%8 may
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F koyama-etal-2020-construction
%X The NAIST Lang-8 Learner Corpora (Lang-8 corpus) is one of the largest second-language learner corpora. The Lang-8 corpus is suitable as a training dataset for machine translation-based grammatical error correction systems. However, it is not suitable as an evaluation dataset because the corrected sentences sometimes include inappropriate sentences. Therefore, we created and released an evaluation corpus for correcting grammatical errors made by learners of Japanese as a Second Language (JSL). As our corpus has less noise and its annotation scheme reflects the characteristics of the dataset, it is ideal as an evaluation corpus for correcting grammatical errors in sentences written by JSL learners. In addition, we applied neural machine translation (NMT) and statistical machine translation (SMT) techniques to correct the grammar of the JSL learners’ sentences and evaluated their results using our corpus. We also compared the performance of the NMT system with that of the SMT system.
%U https://aclanthology.org/2020.lrec-1.26
%P 204-211
Markdown (Informal)
[Construction of an Evaluation Corpus for Grammatical Error Correction for Learners of Japanese as a Second Language](https://aclanthology.org/2020.lrec-1.26) (Koyama et al., LREC 2020)
ACL