@inproceedings{wu-etal-2024-perplexing,
title = "Perplexing Canon: A study on {GPT}-based perplexity of canonical and non-canonical literary works",
author = "Wu, Yaru and
Bizzoni, Yuri and
Moreira, Pascale and
Nielbo, Kristoffer",
editor = "Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Szpakowicz, Stan",
booktitle = "Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.latechclfl-1.16/",
pages = "172--184",
abstract = "This study extends previous research on literary quality by using information theory-based methods to assess the level of perplexity recorded by three large language models when processing 20th-century English novels deemed to have high literary quality, recognized by experts as canonical, compared to a broader control group. We find that canonical texts appear to elicit a higher perplexity in the models, we explore which textual features might concur to create such an effect. We find that the usage of a more heavily nominal style, together with a more diverse vocabulary, is one of the leading causes of the difference between the two groups. These traits could reflect {\textquotedblleft}strategies{\textquotedblright} to achieve an informationally dense literary style."
}
Markdown (Informal)
[Perplexing Canon: A study on GPT-based perplexity of canonical and non-canonical literary works](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.latechclfl-1.16/) (Wu et al., LaTeCHCLfL 2024)
ACL