@inproceedings{zhou-etal-2024-matching,
title = "Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings",
author = "Zhou, Xixi and
Gu, Chunbin and
Jie, Xin and
Bu, Jiajun and
Wang, Haishuai",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.findings-naacl.81/",
doi = "10.18653/v1/2024.findings-naacl.81",
pages = "1274--1280",
abstract = "Measuring semantic similarity between texts is a crucial task in natural language processing. While existing semantic text matching focuses on pairs of similar-length sequences, matching texts with non-comparable lengths has broader applications in specific domains, such as comparing professional document summaries and content. Current approaches struggle with text pairs of non-comparable lengths due to truncation issues. To address this, we split texts into natural sentences and decouple sentence representations using supervised contrastive learning (SCL). Meanwhile, we adopt the embedded topic model (ETM) for specific domain data. Our experiments demonstrate the effectiveness of our model, based on decoupled and topic-informed sentence embeddings, in matching texts of significantly different lengths across three well-studied datasets."
}
Markdown (Informal)
[Matching Varying-Length Texts via Topic-Informed and Decoupled Sentence Embeddings](https://preview.aclanthology.org/fix-sig-urls/2024.findings-naacl.81/) (Zhou et al., Findings 2024)
ACL