@inproceedings{tsekouras-etal-2017-graph,
title = "A Graph-based Text Similarity Measure That Employs Named Entity Information",
author = "Tsekouras, Leonidas and
Varlamis, Iraklis and
Giannakopoulos, George",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://preview.aclanthology.org/fix-sig-urls/R17-1098/",
doi = "10.26615/978-954-452-049-6_098",
pages = "765--771",
abstract = "Text comparison is an interesting though hard task, with many applications in Natural Language Processing. This work introduces a new text-similarity measure, which employs named-entities' information extracted from the texts and the n-gram graphs' model for representing documents. Using OpenCalais as a named-entity recognition service and the JINSECT toolkit for constructing and managing n-gram graphs, the text similarity measure is embedded in a text clustering algorithm (k-Means). The evaluation of the produced clusters with various clustering validity metrics shows that the extraction of named entities at a first step can be profitable for the time-performance of similarity measures that are based on the n-gram graph representation without affecting the overall performance of the NLP task."
}
Markdown (Informal)
[A Graph-based Text Similarity Measure That Employs Named Entity Information](https://preview.aclanthology.org/fix-sig-urls/R17-1098/) (Tsekouras et al., RANLP 2017)
ACL