@inproceedings{rosenthal-etal-2026-semeval,
title = "{S}em{E}val-2026 Task 8: {MTRAGE}val: Evaluating Multi-Turn {RAG} Conversations",
author = "Rosenthal, Sara and
Shah, Vraj and
Katsis, Yannis and
Danilevsky, Marina",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.447/",
pages = "3673--3690",
ISBN = "979-8-89176-414-9",
abstract = "We present the results and findings from SemEval Task 8: MTRAGEval. MTRAGEval measures three Retrieval Augmented Generation (RAG) subtasks: A. Retrieval, B. Generate, and C. Retrieve+Generate (full RAG) on multi-turn conversations. The task is evaluated using MTRAG-UN, a new benchmark for Multi-Turn RAG focusing on Unanswerable, Underspecified, Non-Standalone, and Unclear Questions. The MTRAGEval task attracted strong participation with 107 registered teams and 92 submissions across all subtasks, and yielded several interesting findings on effective retrieval and query rewriting techniques, the use of ensemble models, and the compounding costs of retrieval errors on downstream generation quality."
}Markdown (Informal)
[SemEval-2026 Task 8: MTRAGEval: Evaluating Multi-Turn RAG Conversations](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.447/) (Rosenthal et al., SemEval 2026)
ACL