@inproceedings{munoz-ortiz-etal-2025-evaluating,
title = "Evaluating Pixel Language Models on Non-Standardized Languages",
author = "Mu{\~n}oz-Ortiz, Alberto and
Blaschke, Verena and
Plank, Barbara",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.427/",
pages = "6412--6419",
abstract = "We explore the potential of pixel-based models for transfer learning from standard languages to dialects. These models convert text into images that are divided into patches, enabling a continuous vocabulary representation that proves especially useful for out-of-vocabulary words common in dialectal data. Using German as a case study, we compare the performance of pixel-based models to token-based models across various syntactic and semantic tasks. Our results show that pixel-based models outperform token-based models in part-of-speech tagging, dependency parsing and intent detection for zero-shot dialect evaluation by up to 26 percentage points in some scenarios, though not in Standard German. However, pixel-based models fall short in topic classification. These findings emphasize the potential of pixel-based models for handling dialectal data, though further research should be conducted to assess their effectiveness in various linguistic contexts."
}
Markdown (Informal)
[Evaluating Pixel Language Models on Non-Standardized Languages](https://preview.aclanthology.org/fix-sig-urls/2025.coling-main.427/) (Muñoz-Ortiz et al., COLING 2025)
ACL