@inproceedings{brooke-hirst-2012-measuring,
title = "Measuring Interlanguage: Native Language Identification with {L}1-influence Metrics",
author = "Brooke, Julian and
Hirst, Graeme",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/129_Paper.pdf",
pages = "779--784",
abstract = "The task of native language (L1) identification suffers from a relative paucity of useful training corpora, and standard within-corpus evaluation is often problematic due to topic bias. In this paper, we introduce a method for L1 identification in second language (L2) texts that relies only on much more plentiful L1 data, rather than the L2 texts that are traditionally used for training. In particular, we do word-by-word translation of large L1 blog corpora to create a mapping to L2 forms that are a possible result of language transfer, and then use that information for unsupervised classification. We show this method is effective in several different learner corpora, with bigram features being particularly useful.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brooke-hirst-2012-measuring">
<titleInfo>
<title>Measuring Interlanguage: Native Language Identification with L1-influence Metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Brooke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graeme</namePart>
<namePart type="family">Hirst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of native language (L1) identification suffers from a relative paucity of useful training corpora, and standard within-corpus evaluation is often problematic due to topic bias. In this paper, we introduce a method for L1 identification in second language (L2) texts that relies only on much more plentiful L1 data, rather than the L2 texts that are traditionally used for training. In particular, we do word-by-word translation of large L1 blog corpora to create a mapping to L2 forms that are a possible result of language transfer, and then use that information for unsupervised classification. We show this method is effective in several different learner corpora, with bigram features being particularly useful.</abstract>
<identifier type="citekey">brooke-hirst-2012-measuring</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/129_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>779</start>
<end>784</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Interlanguage: Native Language Identification with L1-influence Metrics
%A Brooke, Julian
%A Hirst, Graeme
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F brooke-hirst-2012-measuring
%X The task of native language (L1) identification suffers from a relative paucity of useful training corpora, and standard within-corpus evaluation is often problematic due to topic bias. In this paper, we introduce a method for L1 identification in second language (L2) texts that relies only on much more plentiful L1 data, rather than the L2 texts that are traditionally used for training. In particular, we do word-by-word translation of large L1 blog corpora to create a mapping to L2 forms that are a possible result of language transfer, and then use that information for unsupervised classification. We show this method is effective in several different learner corpora, with bigram features being particularly useful.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/129_Paper.pdf
%P 779-784
Markdown (Informal)
[Measuring Interlanguage: Native Language Identification with L1-influence Metrics](http://www.lrec-conf.org/proceedings/lrec2012/pdf/129_Paper.pdf) (Brooke & Hirst, LREC 2012)
ACL