@inproceedings{bertolini-etal-2022-testing,
title = "Testing Large Language Models on Compositionality and Inference with Phrase-Level Adjective-Noun Entailment",
author = "Bertolini, Lorenzo and
Weeds, Julie and
Weir, David",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2022.coling-1.359/",
pages = "4084--4100",
abstract = "Previous work has demonstrated that pre-trained large language models (LLM) acquire knowledge during pre-training which enables reasoning over relationships between words (e.g, hyponymy) and more complex inferences over larger units of meaning such as sentences. Here, we investigate whether lexical entailment (LE, i.e. hyponymy or the is a relation between words) can be generalised in a compositional manner. Accordingly, we introduce PLANE (Phrase-Level Adjective-Noun Entailment), a new benchmark to test models on fine-grained compositional entailment using adjective-noun phrases. Our experiments show that knowledge extracted via In{--}Context and transfer learning is not enough to solve PLANE. However, a LLM trained on PLANE can generalise well to out{--}of{--}distribution sets, since the required knowledge can be stored in the representations of subwords (SW) tokens."
}