@inproceedings{unnikrishnan-ma-2026-tabguard,
title = "{T}ab{G}uard: Agentic {LLM} Orchestration for Adaptive Tabular Anomaly Detection via Dynamic Validator Selection and Generation",
author = "Unnikrishnan, Srihari and
Ma, Minghua",
editor = "Gupta, Vivek and
Ding, Kaize and
Kokel, Harsha and
Zhao, Yue and
Agarwal, Amit and
Wang, Yu and
Glass, Michael and
Zhang, Yu and
Srinivas, Kavitha and
Chen, Xiusi and
Hassanzadeh, Oktie and
Zhu, Qi and
Chang, Shuaichen and
Luo, Yuan",
booktitle = "Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the {LLM} Era ({SURG}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.surgellm-1.9/",
pages = "152--161",
ISBN = "979-8-89176-406-4",
abstract = "Tabular anomaly detection is challenging because real-world tables contain heterogeneous columns, ranging from structured identifiers to free-form text. Existing methods face a fundamental trilemma: rule-based systems require extensive manual configuration and fail on novel schemas; statistical methods scale efficiently but miss semantic errors; and LLM-based approaches understand semantics but incur prohibitive per-cell inference costs. No prior method simultaneously addresses semantic heterogeneity, domain-specific validation rules, and enterprise-scale processing.We introduce TabGuard, an agentic framework that resolves this trilemma through semantic routing. Using LLM function calling, the system analyzes a small sample of each column and dynamically selects the most effective validation strategy, routing to a regex-based validator for syntactic patterns, a code-generation validator for domain-specific rules (such as Luhn checksums for credit cards), or an embedding-based validator for distributional outliers. This architecture decouples expensive cognitive reasoning ($O(m)$ LLM calls for $m$ columns) from scalable programmatic execution, enabling deployment on enterprise datasets without per-cell inference."
}Markdown (Informal)
[TabGuard: Agentic LLM Orchestration for Adaptive Tabular Anomaly Detection via Dynamic Validator Selection and Generation](https://preview.aclanthology.org/ingest-acl-workshops/2026.surgellm-1.9/) (Unnikrishnan & Ma, SURGeLLM 2026)
ACL