@inproceedings{fesenko-etal-2026-mining,
title = "Mining Native {U}krainian Paraphrases: A Multi-Source Comparison",
author = "Fesenko, Vladyslav and
Dydyk-Meush, Hanna and
Mudryi, Volodymyr",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fifth {U}krainian Natural Language Processing Conference ({UNLP} 2026)",
month = may,
year = "2026",
address = "Lviv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2026-06/2026.unlp-1.17/",
pages = "199--208",
ISBN = "979-8-89176-359-3",
abstract = "We introduce a Ukrainian paraphrase dataset mined from event-aligned news headlines and compare it with translated and LLM-generated data sources. Candidate pairs are retrieved from native Ukrainian news titles and filtered using semantic and lexical constraints to form a training corpus in a semi-automatic pipeline. Human evaluation indicates that the sources differ in useful ways: LLM-generated paraphrases are generally stronger in meaning preservation, whereas news-mined pairs offer greater lexical variation while remaining fluent and meaning-preserving. We tune mT5-large and mT0-large and evaluate them on several held-out test sets, including a human-validated subset. Relative to Spivavtor-large, the models achieve comparable semantic preservation with lower copying on the combined and human-validated sets. Overall, the findings highlight the value of naturally mined Ukrainian paraphrases as supervision for low-resource paraphrase generation."
}Markdown (Informal)
[Mining Native Ukrainian Paraphrases: A Multi-Source Comparison](https://preview.aclanthology.org/corrections-2026-06/2026.unlp-1.17/) (Fesenko et al., UNLP 2026)
ACL