@inproceedings{yamauchi-aizawa-2026-semantics,
title = "From Semantics to Style: A Cross-Dataset Comparative Framework for Sentence Similarity Predictions",
author = "Yamauchi, Yusuke and
Aizawa, Akiko",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.95/",
pages = "1848--1877",
ISBN = "979-8-89176-386-9",
abstract = "While Semantic Textual Similarity (STS) task serves as a cornerstone embedding task in natural language processing, the definition of similarity is inherently ambiguous and dataset-specific. Comprehensive cross-dataset analysis remains scarce, leaving it uncertain whether language models perceive diverse semantic and stylistic nuances as humans do. To address this, we propose a comparative framework utilizing lightweight poolers on a frozen encoder to conduct a unified analysis across STS, Paraphrase Identification (PI), and Triplet datasets. Experimental results on 21 datasets indicate a high correlation of semantic concepts between STS and PI settings, while highlighting style as a distinct dimension necessitating explicit separation from semantics. Moreover, Procrustes, layer-wise and hierarchical clustering analyses elucidate the varying properties of these concepts and the structural organization of the embedding space. These insights imply that treating semantics and style as separate components in embedding models is crucial for enhancing both interpretability and practical utility."
}Markdown (Informal)
[From Semantics to Style: A Cross-Dataset Comparative Framework for Sentence Similarity Predictions](https://preview.aclanthology.org/ingest-eacl/2026.findings-eacl.95/) (Yamauchi & Aizawa, Findings 2026)
ACL