@inproceedings{payne-kodner-2025-lemmas,
title = "Lemmas Matter, But Not Like That: Predictors of Lemma-Based Generalization in Morphological Inflection",
author = "Payne, Sarah Ruth Brogden and
Kodner, Jordan",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1296/",
pages = "25270--25286",
ISBN = "979-8-89176-256-5",
abstract = "Recent work has suggested that overlap {--}whether a given lemma or feature set is attested independently in train {--} drives model performance on morphological inflection tasks. The impact of lemma overlap, however, is debated, with recent work reporting accuracy drops from 0{\%} to 30{\%} between seen and unseen test lemmas. In this paper, we introduce a novel splitting algorithm designed to investigate predictors of accuracy on seen and unseen lemmas. We find only an 11{\%} average drop from seen to unseen test lemmas, but show that the number of lemmas in train has a much stronger effect on accuracy on unseen than seen lemmas. We also show that the previously reported 30{\%} drop is inflated due to the introduction of a near-30{\%} drop in the number of training lemmas from the original splits to their novel splits."
}
Markdown (Informal)
[Lemmas Matter, But Not Like That: Predictors of Lemma-Based Generalization in Morphological Inflection](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1296/) (Payne & Kodner, Findings 2025)
ACL