@inproceedings{tashu-tudor-2025-mapping,
title = "Mapping Cross-Lingual Sentence Representations for Low-Resource Language Pairs Using Pre-trained Language Models",
author = "Tashu, Tsegaye Misikir and
Tudor, Andreea Ioana",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2025.loreslm-1.20/",
pages = "249--257",
abstract = "In this work, we explore different linear mapping techniques to learn cross-lingual document representations from pre-trained multilingual large language models for low-resource languages. Three different mapping techniques namely Linear Concept Approximation (LCA), Linear Concept Compression (LCC), and Neural Concept Approximation (NCA) and four multilingual language models such as mBERT, mT5, XLM-R, and ErnieM were used to extract embeddings. The inter-lingual representations were created mappings the monolingual representation extracted from multilingual language models. The experimental results showed that LCA and LCC significantly outperform NCA, with models like ErnieM achieving the highest alignment quality. Language pairs exhibit variable performance, influenced by linguistic similarity and data availability, with the Amharic-English pair yielding particularly high scores. The results showed the utility of LCA and LCC in enabling cross-lingual tasks for low-resource languages."
}
Markdown (Informal)
[Mapping Cross-Lingual Sentence Representations for Low-Resource Language Pairs Using Pre-trained Language Models](https://preview.aclanthology.org/add-emnlp-2024-awards/2025.loreslm-1.20/) (Tashu & Tudor, LoResLM 2025)
ACL