@inproceedings{rahul-adebayo-2026-beyond,
title = "Beyond Benchmark Accuracy: Robustness Evaluation of {H}inglish Sentiment Models",
author = "Rahul, Chennuru and
Adebayo, Kolawole",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.2/",
pages = "6--13",
ISBN = "979-8-89176-401-9",
abstract = "Multilingual transformers have achieved re-markable performance on code-mixed senti-ment benchmarks, but their robustness underlinguistic stress and domain shift remains un-derexplored. We fine-tune XLM-RoBERTaand mBERT on a carefully cleaned 25,543-tweet Hinglish sentiment dataset, where XLM-R achieves near-perfect in-distribution accu-racy (99.7{\%}). The integrity of this result isconfirmed by rigorous hash-based and 3-gramJaccard deduplication, ruling out data leakage.However, when evaluated on a 400-examplehuman-validated adversarial benchmark span-ning negation, sarcasm, contrast, subtle senti-ment, and true neutral, XLM-R performancecollapses to 42.5{\%} {--} a drop of over 57 per-centage points. Zero-shot transfer to EnglishTweetEval yields only 50.8{\%} accuracy (40.8{\%}macro F1), above . Our results highlight a crit-ical gap between benchmark scores and real-world reliability, underscoring the need for ad-versarial evaluation and cross-domain stress-testing before deploying sentiment models inpractical, safety-sensitive applications."
}Markdown (Informal)
[Beyond Benchmark Accuracy: Robustness Evaluation of Hinglish Sentiment Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.dravidianlangtech-1.2/) (Rahul & Adebayo, DravidianLangTech 2026)
ACL