@inproceedings{ralethe-buys-2025-cross-lingual,
title = "Cross-Lingual Knowledge Augmentation for Mitigating Generic Overgeneralization in Multilingual Language Models",
author = "Ralethe, Sello and
Buys, Jan",
editor = "Adelani, David Ifeoluwa and
Arnett, Catherine and
Ataman, Duygu and
Chang, Tyler A. and
Gonen, Hila and
Raja, Rahul and
Schmidt, Fabian and
Stap, David and
Wang, Jiayi",
booktitle = "Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025)",
month = nov,
year = "2025",
address = "Suzhuo, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.32/",
pages = "483--495",
ISBN = "979-8-89176-345-6",
abstract = "Generic statements like ``birds fly'' or ``lions have manes'' express generalizations about kinds that allow exceptions, yet language models tend to overgeneralize them to universal claims. While previous work showed that ASCENT KB could reduce this effect in English by 30-40{\%}, the effectiveness of broader knowledge sources and the cross-lingual nature of this phenomenon remain unexplored. We investigate generic overgeneralization across English and four South African languages (isiZulu, isiXhosa, Sepedi, SeSotho), comparing the impact of ConceptNet and DBpedia against the previously used ASCENT KB. Our experiments show that ConceptNet reduces overgeneralization by 45-52{\%}{\%} for minority characteristic generics, while DBpedia achieves 48-58{\%}{\%} for majority characteristics, with combined knowledge bases reaching 67{\%}{\%} reduction. These improvements are consistent across all languages, though Nguni languages show higher baseline overgeneralization than Sotho-Tswana languages, potentially suggesting that morphological features may influence this semantic bias. Our findings demonstrate that commonsense and encyclopedic knowledge provide complementary benefits for multilingual semantic understanding, offering insights for developing NLP systems that capture nuanced semantics in low-resource languages."
}Markdown (Informal)
[Cross-Lingual Knowledge Augmentation for Mitigating Generic Overgeneralization in Multilingual Language Models](https://preview.aclanthology.org/ingest-emnlp/2025.mrl-main.32/) (Ralethe & Buys, MRL 2025)
ACL