@inproceedings{poelman-etal-2025-relate,
title = "How Can We Relate Language Modeling to Morphology?",
author = "Poelman, Wessel and
Bauwens, Thomas and
de Lhoneux, Miryam",
editor = "Adelani, David Ifeoluwa and
Arnett, Catherine and
Ataman, Duygu and
Chang, Tyler A. and
Gonen, Hila and
Raja, Rahul and
Schmidt, Fabian and
Stap, David and
Wang, Jiayi",
booktitle = "Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025)",
month = nov,
year = "2025",
address = "Suzhuo, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.14/",
pages = "196--198",
ISBN = "979-8-89176-345-6",
abstract = "The extent to which individual language characteristics influence tokenization and language modeling is an open question. Differences in morphological systems have been suggested as both unimportant and crucial to consider (e.g., Cotterell et al., 2018; Park et al., 2021, Arnett {\&} Bergen, 2025). We argue this conflicting evidence is due to confounding factors in experimental setups, making it hard to compare results and draw conclusions. We identify confounding factors in analyses trying to answer the question of whether, and how, morphology relates to language modeling. Next, we introduce token bigram metrics as an intrinsic way to predict the difficulty of causal language modeling, and find that they are gradient proxies for morphological complexity that do not require expert annotation. Ultimately, we outline necessities to reliably answer whether, and how, morphology relates to language modeling."
}Markdown (Informal)
[How Can We Relate Language Modeling to Morphology?](https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.14/) (Poelman et al., MRL 2025)
ACL
- Wessel Poelman, Thomas Bauwens, and Miryam de Lhoneux. 2025. How Can We Relate Language Modeling to Morphology?. In Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025), pages 196–198, Suzhuo, China. Association for Computational Linguistics.