@inproceedings{marreira-etal-2026-rating,
title = "Rating{--}Text Mismatch in {B}razilian {P}ortuguese Reviews: How Reliable Are Zero-Shot {LLM}s?",
author = "Marreira, Emanuelle and
Figueiredo, Carlos M. S. and
Melo, Tiago de",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-dnd/2026.propor-1.96/",
pages = "959--967",
ISBN = "979-8-89176-387-6",
abstract = "This study evaluates the ability of large language models (LLMs) to detect incoherence between the text of product reviews and their assigned rating (1 or 5 stars). Using popular LLMs such as GPT-5, Llama-4 and DeepSeek-3.2, and models optimized for Brazilian Portuguese, Sabi{\'a}-3.1 and Bode-3.1, we show that some are capable of detecting incoherence among texts and ratings (F1 {\ensuremath{>}} 90{\%}) in a zero-shot protocol. Models also present a high agreement in the predictions, where several prediction rounds led to low variability (Fleiss' {\ensuremath{\kappa}}{\ensuremath{>}} 0.95). With the demonstrated incoherence present in all product categories (aprox. 10{\%} of comments), the results suggest that LLMs are very promising to perform this high semantic interpretation task, and they can be used as valuable tools for online monitoring and recommendation systems."
}Markdown (Informal)
[Rating–Text Mismatch in Brazilian Portuguese Reviews: How Reliable Are Zero-Shot LLMs?](https://preview.aclanthology.org/ingest-dnd/2026.propor-1.96/) (Marreira et al., PROPOR 2026)
ACL