@inproceedings{bago-bakaric-2025-shot,
    title = "Few-Shot Prompting, Full-Scale Confusion: Evaluating Large Language Models for Humor Detection in {C}roatian Tweets",
    author = "Bago, Petra  and
      Bakari{\'c}, Nikola",
    editor = "Piskorski, Jakub  and
      P{\v{r}}ib{\'a}{\v{n}}, Pavel  and
      Nakov, Preslav  and
      Yangarber, Roman  and
      Marcinczuk, Michal",
    booktitle = "Proceedings of the 10th Workshop on Slavic Natural Language Processing (Slavic NLP 2025)",
    month = jul,
    year = "2025",
    address = "Vienna, Austria",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.bsnlp-1.2/",
    doi = "10.18653/v1/2025.bsnlp-1.2",
    pages = "9--16",
    ISBN = "978-1-959429-57-9",
    abstract = "Humor detection in low-resource languages is hampered by cultural nuance and subjective annotation. We test two large language models, GPT-4 and Gemini 2.5 Flash, on labeling humor in 6,000 Croatian tweets with expert gold labels generated through a rigorous annotation pipeline. LLM{--}human agreement ({\ensuremath{\kappa}} = 0.28) matches human{--}human agreement ({\ensuremath{\kappa}} = 0.27), while LLM{--}LLM agreement is substantially higher ({\ensuremath{\kappa}} = 0.63). Although concordance with expert adjudication is lower, additional metrics imply that the models equal a second human annotator while working far faster and at negligible cost. These findings suggest, even with simple prompting, LLMs can efficiently bootstrap subjective datasets and serve as practical annotation assistants in linguistically under-represented settings."
}Markdown (Informal)
[Few-Shot Prompting, Full-Scale Confusion: Evaluating Large Language Models for Humor Detection in Croatian Tweets](https://preview.aclanthology.org/ingest-emnlp/2025.bsnlp-1.2/) (Bago & Bakarić, BSNLP 2025)
ACL