@inproceedings{harrison-walker-2026-cross,
title = "Cross-Domain Semantic Fidelity Evaluation for Meaning-to-Text Generation",
author = "Harrison, Davan and
Walker, Marilyn",
editor = "Mille, Simon and
Gehrmann, Sebastian and
Schmidtov{\'a}, Patr{\'i}cia and
Du{\v{s}}ek, Ond{\v{r}}ej and
Fadaee, Marzieh and
Lo, Kyle and
Santus, Enrico and
Stanovsky, Gabriel",
booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.41/",
pages = "443--455",
ISBN = "979-8-89176-423-1",
abstract = "Slot Error Rate (SER) is the standard metric for evaluating semantic accuracy in meaning-to-text generation, but computing it has historically required domain-specific scripts that do not generalize across datasets. We present a cross-domain SER evaluation framework that replaces hand-crafted rules with a learned slot extraction model. We adapt Llama-3.2-3B-Instruct with LoRA, updating only 0.34{\%} of its parameters, and show that this small adapted model outperforms prompted frontier LLMs by a wide margin on structured extraction across 23 dialogue domains. We further apply overgenerate-and-rank to the extraction task itself, generating multiple candidate meaning representations and selecting the best one with a trained ranker, which improves SER-Accuracy from 75{\%} to 88{\%}. We combine the extraction model with a Natural Language Inference (NLI) verification baseline through learned per-example routing, achieving 90.0{\%} accuracy on held-out evaluation pairs without any domain-specific rule engineering. We compare our framework against published rule-based SER tools and show that our learned approach matches or outperforms hand-crafted scripts on all six comparable domains."
}Markdown (Informal)
[Cross-Domain Semantic Fidelity Evaluation for Meaning-to-Text Generation](https://preview.aclanthology.org/ingest-acl-workshops/2026.gem-main.41/) (Harrison & Walker, GEM 2026)
ACL