@inproceedings{eiselen-2016-south,
title = "{S}outh {A}frican Language Resources: Phrase Chunking",
author = "Eiselen, Roald",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1109",
pages = "689--693",
abstract = "Phrase chunking remains an important natural language processing (NLP) technique for intermediate syntactic processing. This paper describes the development of protocols, annotated phrase chunking data sets and automatic phrase chunkers for ten South African languages. Various problems with adapting the existing annotation protocols of English are discussed as well as an overview of the annotated data sets. Based on the annotated sets, CRF-based phrase chunkers are created and tested with a combination of different features, including part of speech tags and character n-grams. The results of the phrase chunking evaluation show that disjunctively written languages can achieve notably better results for phrase chunking with a limited data set than conjunctive languages, but that the addition of character n-grams improve the results for conjunctive languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eiselen-2016-south">
<titleInfo>
<title>South African Language Resources: Phrase Chunking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roald</namePart>
<namePart type="family">Eiselen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Phrase chunking remains an important natural language processing (NLP) technique for intermediate syntactic processing. This paper describes the development of protocols, annotated phrase chunking data sets and automatic phrase chunkers for ten South African languages. Various problems with adapting the existing annotation protocols of English are discussed as well as an overview of the annotated data sets. Based on the annotated sets, CRF-based phrase chunkers are created and tested with a combination of different features, including part of speech tags and character n-grams. The results of the phrase chunking evaluation show that disjunctively written languages can achieve notably better results for phrase chunking with a limited data set than conjunctive languages, but that the addition of character n-grams improve the results for conjunctive languages.</abstract>
<identifier type="citekey">eiselen-2016-south</identifier>
<location>
<url>https://aclanthology.org/L16-1109</url>
</location>
<part>
<date>2016-may</date>
<extent unit="page">
<start>689</start>
<end>693</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T South African Language Resources: Phrase Chunking
%A Eiselen, Roald
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 may
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F eiselen-2016-south
%X Phrase chunking remains an important natural language processing (NLP) technique for intermediate syntactic processing. This paper describes the development of protocols, annotated phrase chunking data sets and automatic phrase chunkers for ten South African languages. Various problems with adapting the existing annotation protocols of English are discussed as well as an overview of the annotated data sets. Based on the annotated sets, CRF-based phrase chunkers are created and tested with a combination of different features, including part of speech tags and character n-grams. The results of the phrase chunking evaluation show that disjunctively written languages can achieve notably better results for phrase chunking with a limited data set than conjunctive languages, but that the addition of character n-grams improve the results for conjunctive languages.
%U https://aclanthology.org/L16-1109
%P 689-693
Markdown (Informal)
[South African Language Resources: Phrase Chunking](https://aclanthology.org/L16-1109) (Eiselen, LREC 2016)
ACL
- Roald Eiselen. 2016. South African Language Resources: Phrase Chunking. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 689–693, Portorož, Slovenia. European Language Resources Association (ELRA).