@inproceedings{shardlow-2014-open,
title = "Out in the Open: Finding and Categorising Errors in the Lexical Simplification Pipeline",
author = "Shardlow, Matthew",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/479_Paper.pdf",
pages = "1583--1590",
abstract = "Lexical simplification is the task of automatically reducing the complexity of a text by identifying difficult words and replacing them with simpler alternatives. Whilst this is a valuable application of natural language generation, rudimentary lexical simplification systems suffer from a high error rate which often results in nonsensical, non-simple text. This paper seeks to characterise and quantify the errors which occur in a typical baseline lexical simplification system. We expose 6 distinct categories of error and propose a classification scheme for these. We also quantify these errors for a moderate size corpus, showing the magnitude of each error type. We find that for 183 identified simplification instances, only 19 (10.38{\%}) result in a valid simplification, with the rest causing errors of varying gravity.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shardlow-2014-open">
<titleInfo>
<title>Out in the Open: Finding and Categorising Errors in the Lexical Simplification Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Shardlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical simplification is the task of automatically reducing the complexity of a text by identifying difficult words and replacing them with simpler alternatives. Whilst this is a valuable application of natural language generation, rudimentary lexical simplification systems suffer from a high error rate which often results in nonsensical, non-simple text. This paper seeks to characterise and quantify the errors which occur in a typical baseline lexical simplification system. We expose 6 distinct categories of error and propose a classification scheme for these. We also quantify these errors for a moderate size corpus, showing the magnitude of each error type. We find that for 183 identified simplification instances, only 19 (10.38%) result in a valid simplification, with the rest causing errors of varying gravity.</abstract>
<identifier type="citekey">shardlow-2014-open</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/479_Paper.pdf</url>
</location>
<part>
<date>2014-may</date>
<extent unit="page">
<start>1583</start>
<end>1590</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Out in the Open: Finding and Categorising Errors in the Lexical Simplification Pipeline
%A Shardlow, Matthew
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 may
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F shardlow-2014-open
%X Lexical simplification is the task of automatically reducing the complexity of a text by identifying difficult words and replacing them with simpler alternatives. Whilst this is a valuable application of natural language generation, rudimentary lexical simplification systems suffer from a high error rate which often results in nonsensical, non-simple text. This paper seeks to characterise and quantify the errors which occur in a typical baseline lexical simplification system. We expose 6 distinct categories of error and propose a classification scheme for these. We also quantify these errors for a moderate size corpus, showing the magnitude of each error type. We find that for 183 identified simplification instances, only 19 (10.38%) result in a valid simplification, with the rest causing errors of varying gravity.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/479_Paper.pdf
%P 1583-1590
Markdown (Informal)
[Out in the Open: Finding and Categorising Errors in the Lexical Simplification Pipeline](http://www.lrec-conf.org/proceedings/lrec2014/pdf/479_Paper.pdf) (Shardlow, LREC 2014)
ACL