@inproceedings{dementieva-etal-2025-emobench,
title = "{E}mo{B}ench-{UA}: A Benchmark Dataset for Emotion Detection in {U}krainian",
author = "Dementieva, Daryna and
Babakov, Nikolay and
Fraser, Alexander",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.107/",
doi = "10.18653/v1/2025.findings-emnlp.107",
pages = "2025--2048",
ISBN = "979-8-89176-335-7",
abstract = "While Ukrainian NLP has seen progress in many texts processing tasks, emotion classification remains an underexplored area with no publicly available benchmark to date. In this work, we introduce **EmoBench-UA**, the first annotated dataset for emotion detection in Ukrainian texts. Our annotation schema is adapted from the previous English-centric works on emotion detection (Mohammad et al., 2018; Mohammad, 2022) guidelines. The dataset was created through crowdsourcing using the Toloka.ai platform ensuring high-quality of the annotation process. Then, we evaluate a range of approaches on the collected dataset, starting from linguistic-based baselines, synthetic data translated from English, to large language models (LLMs). Our findings highlight the challenges of emotion classification in non-mainstream languages like Ukrainian and emphasize the need for further development of Ukrainian-specific models and training resources."
}Markdown (Informal)
[EmoBench-UA: A Benchmark Dataset for Emotion Detection in Ukrainian](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.107/) (Dementieva et al., Findings 2025)
ACL