@inproceedings{poladi-dandapat-2025-comparing,
title = "Comparing Language Models of Different Scales for Security-Focused Tabular Query Generation and Reasoning",
author = "Poladi, Varivashya and
Dandapat, Sandipan",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.55/",
pages = "1002--1016",
ISBN = "979-8-89176-298-5",
abstract = "Security-related data often exists in complex, multi-table formats and is scarce due to privacy and compliance constraints{---}posing a major challenge for training and evaluating language models (LMs) on security reasoning tasks. In this work, we systematically investigate the performance of large language models (LLMs) across different parameter scales in generating and solving multi-step, semantically rich queries over realistic security scenarios represented through three interlinked tabular datasets. We assess models on three key axes (i) their ability to formulate insightful, high complexity security questions; (ii) the quality and coherence of their reasoning chains; and (iii) their accuracy in deriving actionable answers from the underlying data. To address data scarcity, we propose a diffusion-based synthetic data generation pipeline that amplifies the existing dataset while preserving domain semantics and statistical structure. Our findings reveal that while large models often outperform in reasoning depth and query formulation, smaller models show surprising efficiency and accuracy. The study provides actionable insights for deploying generative models in security analytics and opens avenues for synthetic data-driven evaluation of LLMs in low-resource, high-stakes domains."
}Markdown (Informal)
[Comparing Language Models of Different Scales for Security-Focused Tabular Query Generation and Reasoning](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.ijcnlp-long.55/) (Poladi & Dandapat, IJCNLP-AACL 2025)
ACL