@inproceedings{lior-etal-2026-wildifeval,
title = "{W}ild{IFE}val: Instruction Following in the Wild",
author = "Lior, Gili and
Yehudai, Asaf and
Gera, Ariel and
Ein-Dor, Liat",
editor = "Mille, Simon and
Gehrmann, Sebastian and
Schmidtov{\'a}, Patr{\'i}cia and
Du{\v{s}}ek, Ond{\v{r}}ej and
Fadaee, Marzieh and
Lo, Kyle and
Santus, Enrico and
Stanovsky, Gabriel",
booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.66/",
pages = "745--778",
ISBN = "979-8-89176-423-1",
abstract = "Recent LLMs have shown remarkable success in following user instructions, yet handling instructions with multiple constraints remains a significant challenge. In this work, we introduce WildIFEval - a large-scale dataset of $7K$ real user instructions for single-turn constrained text generation, exhibiting diverse, multi-constraint conditions. Unlike prior datasets, our collection spans a broad lexical and topical spectrum of constraints, extracted from natural user instructions. We categorize these constraints into eight high-level classes to capture their distribution and co-occurrence dynamics in real-world scenarios. Leveraging , we conduct extensive experiments to benchmark the instruction-following capabilities of leading LLMs. WildIFEval clearly differentiates between small and large models, and demonstrates that all models have room for improvement on such tasks. Our analysis reveals that as constraint count grows, models' overall success drops sharply while per-constraint success remains stable, indicating a capacity bottleneck in juggling multiple constraints, and that models struggle more with rigid form-based constraints than with softer content-based ones. We release our dataset to promote further research on instruction-following under complex, realistic conditions."
}Markdown (Informal)
[WildIFEval: Instruction Following in the Wild](https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.66/) (Lior et al., GEM 2026)
ACL
- Gili Lior, Asaf Yehudai, Ariel Gera, and Liat Ein-Dor. 2026. WildIFEval: Instruction Following in the Wild. In Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics (GEM), pages 745–778, San Diego, California, USA. Association for Computational Linguistics.