@inproceedings{bagdasarov-alves-2025-like,
title = "Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts",
author = "Bagdasarov, Sergei and
Alves, Diego",
editor = "Arachchige, Isuri Nanomi and
Frontini, Francesca and
Mitkov, Ruslan and
Rayson, Paul",
booktitle = "Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://preview.aclanthology.org/corrections-2026-01/2025.lm4dh-1.4/",
pages = "38--47",
abstract = "The purpose of this study is to analyze lexical and syntactic features in human-written texts and machine-generated texts produced by three state-of-the-art large language models: GPT-4o, Llama 3.1 and Qwen 2.5. We use Kullback-Leibler divergence to quantify the dissimilarity between humans and LLMs as well as to identify relevant features for comparison. We test the predictive power of our features using binary and multi-label random forest classifiers. The classifiers achieve robust performance of above 80{\%} for multi-label classification and above 90{\%} for binary classification. Our results point to substantial differences between human- and machine-generated texts. Human writers show higher variability in the use of syntactic resources, while LLMs score higher in lexical variability."
}Markdown (Informal)
[Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts](https://preview.aclanthology.org/corrections-2026-01/2025.lm4dh-1.4/) (Bagdasarov & Alves, LM4DH 2025)
ACL