@inproceedings{manjavacas-etal-2019-feasibility,
title = "On the Feasibility of Automated Detection of Allusive Text Reuse",
author = "Manjavacas, Enrique and
Long, Brian and
Kestemont, Mike",
booktitle = "Proceedings of the 3rd Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = jun,
year = "2019",
address = "Minneapolis, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2514",
doi = "10.18653/v1/W19-2514",
pages = "104--114",
abstract = "The detection of allusive text reuse is particularly challenging due to the sparse evidence on which allusive references rely {---} commonly based on none or very few shared words. Arguably, lexical semantics can be resorted to since uncovering semantic relations between words has the potential to increase the support underlying the allusion and alleviate the lexical sparsity. A further obstacle is the lack of evaluation benchmark corpora, largely due to the highly interpretative character of the annotation process. In the present paper, we aim to elucidate the feasibility of automated allusion detection. We approach the matter from an Information Retrieval perspective in which referencing texts act as queries and referenced texts as relevant documents to be retrieved, and estimate the difficulty of benchmark corpus compilation by a novel inter-annotator agreement study on query segmentation. Furthermore, we investigate to what extent the integration of lexical semantic information derived from distributional models and ontologies can aid retrieving cases of allusive reuse. The results show that (i) despite low agreement scores, using manual queries considerably improves retrieval performance with respect to a windowing approach, and that (ii) retrieval performance can be moderately boosted with distributional semantics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manjavacas-etal-2019-feasibility">
<titleInfo>
<title>On the Feasibility of Automated Detection of Allusive Text Reuse</title>
</titleInfo>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Manjavacas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Long</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Kestemont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The detection of allusive text reuse is particularly challenging due to the sparse evidence on which allusive references rely — commonly based on none or very few shared words. Arguably, lexical semantics can be resorted to since uncovering semantic relations between words has the potential to increase the support underlying the allusion and alleviate the lexical sparsity. A further obstacle is the lack of evaluation benchmark corpora, largely due to the highly interpretative character of the annotation process. In the present paper, we aim to elucidate the feasibility of automated allusion detection. We approach the matter from an Information Retrieval perspective in which referencing texts act as queries and referenced texts as relevant documents to be retrieved, and estimate the difficulty of benchmark corpus compilation by a novel inter-annotator agreement study on query segmentation. Furthermore, we investigate to what extent the integration of lexical semantic information derived from distributional models and ontologies can aid retrieving cases of allusive reuse. The results show that (i) despite low agreement scores, using manual queries considerably improves retrieval performance with respect to a windowing approach, and that (ii) retrieval performance can be moderately boosted with distributional semantics.</abstract>
<identifier type="citekey">manjavacas-etal-2019-feasibility</identifier>
<identifier type="doi">10.18653/v1/W19-2514</identifier>
<location>
<url>https://aclanthology.org/W19-2514</url>
</location>
<part>
<date>2019-jun</date>
<extent unit="page">
<start>104</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Feasibility of Automated Detection of Allusive Text Reuse
%A Manjavacas, Enrique
%A Long, Brian
%A Kestemont, Mike
%S Proceedings of the 3rd Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2019
%8 jun
%I Association for Computational Linguistics
%C Minneapolis, USA
%F manjavacas-etal-2019-feasibility
%X The detection of allusive text reuse is particularly challenging due to the sparse evidence on which allusive references rely — commonly based on none or very few shared words. Arguably, lexical semantics can be resorted to since uncovering semantic relations between words has the potential to increase the support underlying the allusion and alleviate the lexical sparsity. A further obstacle is the lack of evaluation benchmark corpora, largely due to the highly interpretative character of the annotation process. In the present paper, we aim to elucidate the feasibility of automated allusion detection. We approach the matter from an Information Retrieval perspective in which referencing texts act as queries and referenced texts as relevant documents to be retrieved, and estimate the difficulty of benchmark corpus compilation by a novel inter-annotator agreement study on query segmentation. Furthermore, we investigate to what extent the integration of lexical semantic information derived from distributional models and ontologies can aid retrieving cases of allusive reuse. The results show that (i) despite low agreement scores, using manual queries considerably improves retrieval performance with respect to a windowing approach, and that (ii) retrieval performance can be moderately boosted with distributional semantics.
%R 10.18653/v1/W19-2514
%U https://aclanthology.org/W19-2514
%U https://doi.org/10.18653/v1/W19-2514
%P 104-114
Markdown (Informal)
[On the Feasibility of Automated Detection of Allusive Text Reuse](https://aclanthology.org/W19-2514) (Manjavacas et al., 2019)
ACL
- Enrique Manjavacas, Brian Long, and Mike Kestemont. 2019. On the Feasibility of Automated Detection of Allusive Text Reuse. In Proceedings of the 3rd Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 104–114, Minneapolis, USA. Association for Computational Linguistics.