@inproceedings{durward-thomson-2024-evaluating,
title = "Evaluating Vocabulary Usage in {LLM}s",
author = "Durward, Matthew and
Thomson, Christopher",
editor = {Kochmar, Ekaterina and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.bea-1.22/",
pages = "266--282",
abstract = "The paper focuses on investigating vocabulary usage for AI and human-generated text. We define vocabulary usage in two ways: structural differences and keyword differences. Structural differences are evaluated by converting text into Vocabulary-Managment Profiles, initially used for discourse analysis. Through VMPs, we can treat the text data as a time series, allowing an evaluation by implementing Dynamic time-warping distance measures and subsequently deriving similarity scores to provide an indication of whether the structural dynamics in AI texts resemble human texts. To analyze keywords, we use a measure that emphasizes frequency and dispersion to source {\textquoteleft}key' keywords. A qualitative approach is then applied, noting thematic differences between human and AI writing."
}
Markdown (Informal)
[Evaluating Vocabulary Usage in LLMs](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.bea-1.22/) (Durward & Thomson, BEA 2024)
ACL
- Matthew Durward and Christopher Thomson. 2024. Evaluating Vocabulary Usage in LLMs. In Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), pages 266–282, Mexico City, Mexico. Association for Computational Linguistics.