@inproceedings{du-2025-titletrap,
title = "{T}itle{T}rap: Probing Presentation Bias in {LLM}-Based Scientific Reviewing",
author = "Du, Shurui",
editor = "Akter, Mousumi and
Chowdhury, Tahiya and
Eger, Steffen and
Leiter, Christoph and
Opitz, Juri and
{\c{C}}ano, Erion",
booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.eval4nlp-1.10/",
pages = "119--125",
ISBN = "979-8-89176-305-0",
abstract = "Large language models (LLMs) are now used in scientific peer review, but their judgments can still be influenced by how information is presented. We study how the style of a paper{'}s title affects the way LLMs score scientific work. To control for content variation, we build the TitleTrap benchmark using abstracts generated by a language model for common research topics in computer vision and NLP. Each abstract is paired with three titles: a branded colon style, a plain descriptive style, and an interrogative style, while the abstract text remains fixed. We ask GPT-4o and Claude to review these title{--}abstract pairs under the same instructions. Our results show that title style alone can change the scores: branded titles often receive higher ratings, while interrogative titles sometimes lead to lower assessments of rigor. These findings reveal a presentation bias in LLM-based peer review and suggest the need for better methods to reduce such bias and support fairer automated evaluation."
}Markdown (Informal)
[TitleTrap: Probing Presentation Bias in LLM-Based Scientific Reviewing](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.eval4nlp-1.10/) (Du, Eval4NLP 2025)
ACL