@inproceedings{ng-etal-2025-less,
title = "Less is More: The Effectiveness of Compact Typological Language Representations",
author = "Ng, York Hay and
Hoang, Phuong Hanh and
Lee, En-Shiun Annie",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/lei-li-partial-disambiguation/2025.emnlp-main.1310/",
pages = "25816--25827",
ISBN = "979-8-89176-332-6",
abstract = "Linguistic feature datasets such as URIEL+ are valuable for modelling cross-lingual relationships, but their high dimensionality and sparsity, especially for low-resource languages, limit the effectiveness of distance metrics. We propose a pipeline to optimize the URIEL+ typological feature space by combining feature selection and imputation, producing compact yet interpretable typological representations. We evaluate these feature subsets on linguistic distance alignment and downstream tasks, demonstrating that reduced-size representations of language typology can yield more informative distance metrics and improve performance in multilingual NLP applications."
}Markdown (Informal)
[Less is More: The Effectiveness of Compact Typological Language Representations](https://preview.aclanthology.org/lei-li-partial-disambiguation/2025.emnlp-main.1310/) (Ng et al., EMNLP 2025)
ACL