@inproceedings{ravikumar-etal-2026-lost,
title = "Lost in Formatting: How Output Formats Skew {LLM} Performance on Information Extraction",
author = "Ravikumar, Rishi and
Ibrahim, Nuhu and
Batista-Navarro, Riza",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.256/",
pages = "5498--5513",
ISBN = "979-8-89176-380-7",
abstract = "We investigate how the choice of output format influences the performance of fine-tuned large language models on information extraction tasks. Based on over 280 experiments spanning multiple benchmarks, models and formats, we find that output formatting is a critical yet largely overlooked hyperparameter. Remarkably, in some cases, changing only the output format shifts F1 scores by over 40{\%} despite using the same model. We further observe that no single format consistently dominates across settings, and the optimal choice depends on factors like model family and dataset characteristics. Overall, these results demonstrate that informationally equivalent output formats can produce substantial performance variation, highlighting the need to treat output formatting as a key factor in building accurate and reliable information extraction systems."
}Markdown (Informal)
[Lost in Formatting: How Output Formats Skew LLM Performance on Information Extraction](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.256/) (Ravikumar et al., EACL 2026)
ACL