@article{einarsson-2026-mazeeval, title = "{M}aze{E}val: A Benchmark for Testing Sequential Decision-Making in Language Models", author = "Einarsson, Hafsteinn", editor = "Piperidis, Stelios and Bel, N{\'u}ria and van den Heuvel, Henk and Ide, Nancy and Krek, Simon and Toral, Antonio", journal = "International Conference on Language Resources and Evaluation", volume = "main", month = may, year = "2026", address = "Palma de Mallorca, Spain", publisher = "ELRA Language Resource Association", url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.27/", pages = "407--418" }