@inproceedings{plank-simaan-2008-subdomain,
title = "Subdomain Sensitive Statistical Parsing using Raw Corpora",
author = "Plank, Barbara and
Sima{'}an, Khalil",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf",
abstract = "Modern statistical parsers are trained on large annotated corpora (treebanks). These treebanks usually consist of sentences addressing different subdomains (e.g. sports, politics, music), which implies that the statistics gathered by current statistical parsers are mixtures of subdomains of language use. In this paper we present a method that exploits raw subdomain corpora gathered from the web to introduce subdomain sensitivity into a given parser. We employ statistical techniques for creating an ensemble of domain sensitive parsers, and explore methods for amalgamating their predictions. Our experiments show that introducing domain sensitivity by exploiting raw corpora can improve over a tough, state-of-the-art baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="plank-simaan-2008-subdomain">
<titleInfo>
<title>Subdomain Sensitive Statistical Parsing using Raw Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Sima’an</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern statistical parsers are trained on large annotated corpora (treebanks). These treebanks usually consist of sentences addressing different subdomains (e.g. sports, politics, music), which implies that the statistics gathered by current statistical parsers are mixtures of subdomains of language use. In this paper we present a method that exploits raw subdomain corpora gathered from the web to introduce subdomain sensitivity into a given parser. We employ statistical techniques for creating an ensemble of domain sensitive parsers, and explore methods for amalgamating their predictions. Our experiments show that introducing domain sensitivity by exploiting raw corpora can improve over a tough, state-of-the-art baseline.</abstract>
<identifier type="citekey">plank-simaan-2008-subdomain</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf</url>
</location>
<part>
<date>2008-may</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Subdomain Sensitive Statistical Parsing using Raw Corpora
%A Plank, Barbara
%A Sima’an, Khalil
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 may
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F plank-simaan-2008-subdomain
%X Modern statistical parsers are trained on large annotated corpora (treebanks). These treebanks usually consist of sentences addressing different subdomains (e.g. sports, politics, music), which implies that the statistics gathered by current statistical parsers are mixtures of subdomains of language use. In this paper we present a method that exploits raw subdomain corpora gathered from the web to introduce subdomain sensitivity into a given parser. We employ statistical techniques for creating an ensemble of domain sensitive parsers, and explore methods for amalgamating their predictions. Our experiments show that introducing domain sensitivity by exploiting raw corpora can improve over a tough, state-of-the-art baseline.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf
Markdown (Informal)
[Subdomain Sensitive Statistical Parsing using Raw Corpora](http://www.lrec-conf.org/proceedings/lrec2008/pdf/120_paper.pdf) (Plank & Sima’an, LREC 2008)
ACL