@inproceedings{mazumder-etal-2025-revealing,
title = "Revealing the impact of synthetic native samples and multi-tasking strategies in {H}indi-{E}nglish code-mixed humour and sarcasm detection",
author = "Mazumder, Debajyoti and
Kumar, Aakash and
Patro, Jasabanta",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1308/",
doi = "10.18653/v1/2025.findings-emnlp.1308",
pages = "24077--24107",
ISBN = "979-8-89176-335-7",
abstract = "In this paper, we reported our experiments with various strategies to improve code-mixed humour and sarcasm detection. Particularly, we tried three approaches: (i) native sample mixing, (ii) multi-task learning (MTL), and (iii) prompting and instruction finetuning very large multilingual language models (VMLMs). In native sample mixing, we added monolingual task samples to code-mixed training sets. In MTL learning, we relied on native and code-mixed samples of a semantically related task (hate detection in our case). Finally, in our third approach, we evaluated the efficacy of VMLMs via few-shot context prompting and instruction finetuning. Some interesting findings we got are (i) adding native samples improved humor (raising the F1-score up to 6.76{\%}) and sarcasm (raising the F1-score up to 8.64{\%}) detection, (ii) training MLMs in an MTL framework boosted performance for both humour (raising the F1-score up to 10.67{\%}) and sarcasm (increment up to 12.35{\%} in F1-score) detection, and (iii) prompting and instruction finetuning VMLMs couldn{'}t outperform the other approaches. Finally, our ablation studies and error analysis discovered the cases where our model is yet to improve. We provided our code for reproducibility."
}Markdown (Informal)
[Revealing the impact of synthetic native samples and multi-tasking strategies in Hindi-English code-mixed humour and sarcasm detection](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.1308/) (Mazumder et al., Findings 2025)
ACL