@inproceedings{debnath-etal-2025-top,
title = "Top Ten from Lakhs: A Transformer-based Retrieval System for Identifying Previously Fact-Checked Claims across Multiple Languages",
author = "Debnath, Srijani and
Pal, Pritam and
Das, Dipankar",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.ranlp-1.33/",
pages = "268--274",
abstract = "The efficient identification of previously fact-checked claims across multiple languages is a challenging task. It can be time-consuming for professional fact-checkers even within a single language. It becomes much more difficult to perform manually when the claim and the fact-check may be in different languages. This paper presents a systematic approach for the retrieval of top-k relevant fact-checks for a given post in a monolingual and cross-lingual setup using two transformer-based fact-checked claim retrieval frameworks that share a common preprocessing pipeline but differ in their underlying encoder implementations: TIDE, a TensorFlow-based custom dual encoder applied to english-translated data, and PTEX, a PyTorch-based encoder operating on both english-translated and original-language inputs, and introduces a lightweight post-processing technique based on a textual feature: Keyword Overlap Count applied via reranking on top of the transformer-based frameworks. Training and evaluation on a large multilingual corpus show that the fine-tuned E5-Large-v2 model in the PTEX framework yields the best monolingual track performance, achieving an average Success@10 score of 0.8846 and the same framework model with post-processing technique achieves an average Success@10 score of 0.7393 which is the best performance in crosslingual track."
}Markdown (Informal)
[Top Ten from Lakhs: A Transformer-based Retrieval System for Identifying Previously Fact-Checked Claims across Multiple Languages](https://preview.aclanthology.org/corrections-2026-01/2025.ranlp-1.33/) (Debnath et al., RANLP 2025)
ACL