@inproceedings{michail-etal-2025-examining,
title = "Examining Multilingual Embedding Models Cross-Lingually Through {LLM}-Generated Adversarial Examples",
author = "Michail, Andrianos and
Clematide, Simon and
Sennrich, Rico",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.115/",
doi = "10.18653/v1/2025.findings-emnlp.115",
pages = "2161--2170",
ISBN = "979-8-89176-335-7",
abstract = "The evaluation of cross-lingual semantic search models is often limited to existing datasets from tasks such as information retrieval and semantic textual similarity. We introduce Cross-Lingual Semantic Discrimination (CLSD), a lightweight evaluation task that requires only parallel sentences and a Large Language Model (LLM) to generate adversarial distractors. CLSD measures an embedding model{'}s ability to rank the true parallel sentence above semantically misleading but lexically similar alternatives. As a case study, we construct CLSD datasets for German{--}French in the news domain. Our experiments show that models fine-tuned for retrieval tasks benefit from pivoting through English, whereas bitext mining models perform best in direct cross-lingual settings. A fine-grained similarity analysis further reveals that embedding models differ in their sensitivity to linguistic perturbations."
}Markdown (Informal)
[Examining Multilingual Embedding Models Cross-Lingually Through LLM-Generated Adversarial Examples](https://preview.aclanthology.org/name-variant-enfa-fane/2025.findings-emnlp.115/) (Michail et al., Findings 2025)
ACL