@inproceedings{frydenlund-2024-mystery,
title = "The Mystery of the Pathological Path-star Task for Language Models",
author = "Frydenlund, Arvid",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.emnlp-main.695/",
doi = "10.18653/v1/2024.emnlp-main.695",
pages = "12493--12516",
abstract = "The recently introduced path-star task is a minimal task designed to exemplify limitations to the abilities of language models (Bachmann and Nagarajan, 2024). It involves a path-star graph where multiple arms radiate from a single starting node and each node is unique. Given the start node and a specified target node that ends an arm, the task is to generate the arm containing that target node. This is straightforward for a human but surprisingly difficult for language models, which did not outperform the random baseline. The authors hypothesized this is due to a deficiency in teacher-forcing and the next-token prediction paradigm. We demonstrate the task is learnable using teacher-forcing in alternative settings and that the issue is partially due to representation. We introduce a regularization method using structured samples of the same graph but with differing target nodes, improving results across a variety of model types. We provide RASP proofs showing the task is theoretically solvable. Finally, we find settings where an encoder-only model can consistently solve the task."
}
Markdown (Informal)
[The Mystery of the Pathological Path-star Task for Language Models](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.emnlp-main.695/) (Frydenlund, EMNLP 2024)
ACL