@inproceedings{heierli-etal-2025-evaluating,
title = "Evaluating {LLM} Capabilities in Low-Resource Contexts: A Case Study of {P}ersian Linguistic and Cultural Tasks",
author = "Heierli, Jasmin and
Ganjineh, Rebecca Bahar and
Gavagnin, Elena",
editor = "Estevanell-Valladares, Ernesto Luis and
Picazo-Izquierdo, Alicia and
Ranasinghe, Tharindu and
Mikaberidze, Besik and
Ostermann, Simon and
Gurgurov, Daniil and
Mueller, Philipp and
Borg, Claudia and
{\v{S}}imko, Mari{\'a}n",
booktitle = "Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.lowresnlp-1.12/",
pages = "111--120",
abstract = "We evaluate four representative large language models, namely GPT-4o, Gemini, Llama, and DeepSeek on on a suite of linguistic and cultural tasks in Persian, covering grammar, paraphrasing, inference, translation, factual recall, analogical reasoning, and a Hofstede-based cultural probe under direct and role-based prompts. Our findings reveal consistent performance declines, alongside systematic misalignment with Iranian cultural norms. Role-based prompting yields modest improvements but does not fully restore cultural fidelity. We conclude that advancing truly multilingual models demands richer Persian resources, targeted adaptation, and evaluation frameworks that jointly assess fluency and cultural alignment."
}