@inproceedings{shi-etal-2026-improve,
title = "How to Improve {LLM}s' Performance on Specific Languages: A Perspective on {LLM}-Derived Language Similarity",
author = "Shi, Xinhe and
Zeng, Qingcheng and
Xuan, Weihao and
Zhu, Linchao",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.691/",
pages = "15139--15164",
ISBN = "979-8-89176-390-6",
abstract = "Large language models (LLMs) exhibit uneven performance across languages. In language-specific applications, practitioners often rely on target-language corpora or cross-lingual transfer to achieve better performance. However, traditional linguistic typology, commonly used as a transfer language selection strategy in previous studies, may not align with LLM{'}s perception of language similarity. This work proposes **LLM-based language similarity** as a novel perspective for selecting effective fine-tuning languages. We construct a framework to quantify the similarity within each language pair through both the lenses of **language-specific performance patterns** and **cross-lingual transferability**, ultimately deriving three similarity score matrices. Moreover, we observe a counter-intuitive phenomenon: **super-additive transfer effect**, where fine-tuning on a certain language yields higher performance than fine-tuning directly on the target language. Additionally, due to the absence of an existing dataset meeting our experimental requirements, we construct and release **M4CQ-Pro** dataset, which features domain-diverse distribution of **135** tasks and content consistency across **31** languages (including over 20 medium- and low-resource languages), with 61518 manually reviewed high-quality questions per language. We evaluate our approach on representative multilingual LLMs and results show that all three LLM-based similarity measures effectively guide fine-tuning language selection, outperforming traditional linguistic similarity, with the integrated measure achieving the best results. Our approach provides not only **a novel perspective on language similarity**, but also **practical baselines for selecting fine-tuning languages**."
}Markdown (Informal)
[How to Improve LLMs’ Performance on Specific Languages: A Perspective on LLM-Derived Language Similarity](https://preview.aclanthology.org/ingest-acl/2026.acl-long.691/) (Shi et al., ACL 2026)
ACL