@inproceedings{bonilla-2026-llm,
title = "{LLM}-Adapted Colombian {S}panish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion",
author = "Bonilla, Johnatan E.",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.5/",
pages = "67--75",
ISBN = "979-8-89176-420-0",
abstract = "We evaluate whether open-source LLMs can produce proficiency-graded English adaptations of entries from the \textit{Diccionario de colombianismos} (DiCol), a Colombian Spanish lexicographic resource used in language teaching. Three 7{--}8B instruction-tuned models{---}Llama{~}3.1, Qwen2.5, and Mistral{---}generate Beginner, Intermediate, and Advanced translations for all 8{,}252 definitions using structured zero-shot prompts identical across levels except for the target CEFR band. Automated metrics show that Intermediate targeting collapses (73{--}83{\%} classified as Advanced by vocabulary, $\chi^2 > 705$, $p < .001$) and that Advanced outputs expand 4.9{--}8.2$\times$ relative to the source. Expert annotation of a 360-entry stratified sample ($\kappa = 0.61${--}0.68) identifies hallucination in 19{\%} of entries (Fleiss' $\kappa = 0.77$ for cultural preservation categories, 97{\%} unanimity among flagged cases). Hallucination concentrates in the Advanced condition (81{\%}, $\chi^2 = 86.6$, $p < .001$) and is associated with higher expansion ($U = 16{,}662$, $p < .001$, $r = 0.68$), manifesting primarily as generic elaboration and, in a smaller proportion, as Colombia-stereotyping and pragmatic polarity inversion. We discuss these findings through the lens of (CITATION){'}s domestication framework and describe the observed pattern as \textit{algorithmic domestication}."
}Markdown (Informal)
[LLM-Adapted Colombian Spanish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion](https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.5/) (Bonilla, C3NLP 2026)
ACL