@inproceedings{batanovic-milicevic-petrovic-2022-cross,
title = "Cross-Level Semantic Similarity for {S}erbian Newswire Texts",
author = "Batanovi{\'c}, Vuk and
Mili{\v{c}}evi{\'c} Petrovi{\'c}, Maja",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.lrec-1.180/",
pages = "1691--1699",
abstract = "Cross-Level Semantic Similarity (CLSS) is a measure of the level of semantic overlap between texts of different lengths. Although this problem was formulated almost a decade ago, research on it has been sparse, and limited exclusively to the English language. In this paper, we present the first CLSS dataset in another language, in the form of CLSS.news.sr {--} a corpus of 1000 phrase-sentence and 1000 sentence-paragraph newswire text pairs in Serbian, manually annotated with fine-grained semantic similarity scores using a 0{--}4 similarity scale. We describe the methodology of data collection and annotation, and compare the resulting corpus to its preexisting counterpart in English, SemEval CLSS, following up with a preliminary linguistic analysis of the newly created dataset. State-of-the-art pre-trained language models are then fine-tuned and evaluated on the CLSS task in Serbian using the produced data, and their settings and results are discussed. The CLSS.news.sr corpus and the guidelines used in its creation are made publicly available."
}
Markdown (Informal)
[Cross-Level Semantic Similarity for Serbian Newswire Texts](https://preview.aclanthology.org/add-emnlp-2024-awards/2022.lrec-1.180/) (Batanović & Miličević Petrović, LREC 2022)
ACL