@inproceedings{bai-etal-2026-omniodata,
title = "{O}mni{OD}ata: Unleashing Small Language Models for {OD}ata Query Generation with Synthetic Data and Reinforcement Learning",
author = "Bai, Tao and
Li, Zhaochen and
Shao, Hongxin and
Dahlmeier, Daniel",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.119/",
pages = "1738--1754",
ISBN = "979-8-89176-394-4",
abstract = "Despite the success of Large Language Models (LLMs) in structured query generation, OData{---}a critical RESTful protocol for enterprise APIs{---}remains under-researched due to a lack of high-fidelity, execution-validated datasets. To bridge this gap, we introduce OmniOData, a framework that generates SynOData, the first large-scale OData corpus featuring execution-grounded queries and reasoning traces. Using this corpus, we develop OmniOData-R1 (1.5B{--}3B parameters), a family of models that match or surpass frontier proprietary systems, such as GPT-4o and Gemini 3, on realistic industrial benchmarks. Our results demonstrate that the synergy of execution-verified synthetic data and Reinforcement Learning (RL) effectively unlocks the latent reasoning of Small Language Models (SLMs), providing a high-performance, low-latency solution for specialized enterprise query generation.The code and data will be released under an open-source license."
}Markdown (Informal)
[OmniOData: Unleashing Small Language Models for OData Query Generation with Synthetic Data and Reinforcement Learning](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.119/) (Bai et al., ACL 2026)
ACL