@inproceedings{mysore-etal-2022-multi,
title = "Multi-Vector Models with Textual Guidance for Fine-Grained Scientific Document Similarity",
author = "Mysore, Sheshera and
Cohan, Arman and
Hope, Tom",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.naacl-main.331/",
doi = "10.18653/v1/2022.naacl-main.331",
pages = "4453--4470",
abstract = "We present a new scientific document similarity model based on matching fine-grained aspects of texts. To train our model, we exploit a naturally-occurring source of supervision: sentences in the full-text of papers that cite multiple papers together (co-citations). Such co-citations not only reflect close paper relatedness, but also provide textual descriptions of how the co-cited papers are related. This novel form of textual supervision is used for learning to match aspects across papers. We develop multi-vector representations where vectors correspond to sentence-level aspects of documents, and present two methods for aspect matching: (1) A fast method that only matches single aspects, and (2) a method that makes sparse multiple matches with an Optimal Transport mechanism that computes an Earth Mover`s Distance between aspects. Our approach improves performance on document similarity tasks in four datasets. Further, our fast single-match method achieves competitive results, paving the way for applying fine-grained similarity to large scientific corpora."
}
Markdown (Informal)
[Multi-Vector Models with Textual Guidance for Fine-Grained Scientific Document Similarity](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.naacl-main.331/) (Mysore et al., NAACL 2022)
ACL