@inproceedings{zhu-etal-2017-semantic,
title = "Semantic Document Distance Measures and Unsupervised Document Revision Detection",
author = "Zhu, Xiaofeng and
Klabjan, Diego and
Bless, Patrick",
editor = "Kondrak, Greg and
Watanabe, Taro",
booktitle = "Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/I17-1095/",
pages = "947--956",
abstract = "In this paper, we model the document revision detection problem as a minimum cost branching problem that relies on computing document distances. Furthermore, we propose two new document distance measures, word vector-based Dynamic Time Warping (wDTW) and word vector-based Tree Edit Distance (wTED). Our revision detection system is designed for a large scale corpus and implemented in Apache Spark. We demonstrate that our system can more precisely detect revisions than state-of-the-art methods by utilizing the Wikipedia revision dumps and simulated data sets."
}
Markdown (Informal)
[Semantic Document Distance Measures and Unsupervised Document Revision Detection](https://preview.aclanthology.org/jlcl-multiple-ingestion/I17-1095/) (Zhu et al., IJCNLP 2017)
ACL