@inproceedings{barbu-etal-2025-improving,
title = "Improving {E}stonian Text Simplification through Pretrained Language Models and Custom Datasets",
author = "Barbu, Eduard and
Muru, Meeri-Ly and
Malva, Sten Marcus",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.ranlp-1.16/",
pages = "133--142",
abstract = "This paper presents a method for text simplification based on two neural architectures: a neural machine translation (NMT) model and a fine-tuned large language model (LLaMA). Given the scarcity of existing resources for Estonian, a new dataset was created by combining manually translated corpora with GPT-4.0-generated simplifications. OpenNMT was selected as a representative NMT-based system, while LLaMA was fine-tuned on the constructed dataset. Evaluation shows LLaMA outperforms OpenNMT in grammaticality, readability, and meaning preservation. These results underscore the effectiveness of large language models for text simplification in low-resource language settings. The complete dataset, fine-tuning scripts, and evaluation pipeline are provided in a publicly accessible supplementary package to support reproducibility and adaptation to other languages."
}Markdown (Informal)
[Improving Estonian Text Simplification through Pretrained Language Models and Custom Datasets](https://preview.aclanthology.org/corrections-2026-01/2025.ranlp-1.16/) (Barbu et al., RANLP 2025)
ACL