@article{bagdasarov-alves-2026-beyond,
title = "Beyond Lemmas and Syntax: Comparing Human and {LLM}-Generated Scientific Abstracts",
author = "Bagdasarov, Sergei and
Alves, Diego",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.304/",
pages = "3823--3832",
abstract = "In this study, we compare human-written (HWT) and machine-generated (MGT) abstracts of scientific papers, going beyond traditional lexical and syntactic analyses. We use an extensive corpus of publications on computational linguistics submitted to the Association of Computational Linguistics from mid 1950s to 2022. First, we generate abstracts with three state-of-the-art models (GPT-4o, Llama 3.1 and Qwen 2.5), providing the models with full texts of papers, and subsequently we compare these abstracts to those written by humans. We study the overall information content of abstracts, operationalised as surprisal, and the distribution of information in abstracts quantified as local Uniform Information Density (UID), both metrics related to the processing effort. Subsequently, we perform an extrinsic evaluation through topic modelling and clustering applying the BERTopic model. Our results show significant differences both in surprisal and UID, suggesting that abstracts generated by Llama are less cognitively demanding and show a more uniform distribution of information. Our topic modelling experiments show greater divergence between humans and LLMs than between LLM pairs. At the same time, Llama abstracts seem to be more semantically similar to those written by humans, standing in line with previous findings suggesting such similarity on lexical and syntactic level."
}Markdown (Informal)
[Beyond Lemmas and Syntax: Comparing Human and LLM-Generated Scientific Abstracts](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.304/) (Bagdasarov & Alves, LREC 2026)
ACL