@inproceedings{lee-etal-2023-linguistic,
title = "Linguistic Properties of Truthful Response",
author = "Lee, Bruce W. and
Arockiaraj, Benedict Florance and
Jin, Helen",
editor = "Ovalle, Anaelia and
Chang, Kai-Wei and
Mehrabi, Ninareh and
Pruksachatkun, Yada and
Galystan, Aram and
Dhamala, Jwala and
Verma, Apurv and
Cao, Trista and
Kumar, Anoop and
Gupta, Rahul",
booktitle = "Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.trustnlp-1.12/",
doi = "10.18653/v1/2023.trustnlp-1.12",
pages = "135--140",
abstract = "We investigate the phenomenon of an LLM{'}s untruthful response using a large set of 220 handcrafted linguistic features. We focus on GPT-3 models and find that the linguistic profiles of responses are similar across model sizes. That is, how varying-sized LLMs respond to given prompts stays similar on the linguistic properties level. We expand upon this finding by training support vector machines that rely only upon the stylistic components of model responses to classify the truthfulness of statements. Though the dataset size limits our current findings, we present promising evidence that truthfulness detection is possible without evaluating the content itself. We release our code and raw data."
}
Markdown (Informal)
[Linguistic Properties of Truthful Response](https://preview.aclanthology.org/fix-sig-urls/2023.trustnlp-1.12/) (Lee et al., TrustNLP 2023)
ACL
- Bruce W. Lee, Benedict Florance Arockiaraj, and Helen Jin. 2023. Linguistic Properties of Truthful Response. In Proceedings of the 3rd Workshop on Trustworthy Natural Language Processing (TrustNLP 2023), pages 135–140, Toronto, Canada. Association for Computational Linguistics.