@inproceedings{cheng-etal-2026-circuitsynth,
title = "{C}ircuit{S}ynth: Reliable Synthetic Data Generation",
author = "Cheng, Zehua and
Dai, Wei and
Sun, Jiahao and
Lukasiewicz, Thomas",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1770/",
pages = "35542--35552",
ISBN = "979-8-89176-395-1",
abstract = "The generation of high-fidelity synthetic data is a cornerstone of modern machine learning, yet Large Language Models (LLMs) frequently suffer from hallucinations, logical inconsistencies, and mode collapse when tasked with structured generation. Existing approaches, such s prompting or retrieval-augmented generaon, lack the mechanisms to balance linguistic expressivity with formal guarantees regarding validity and coverage. To address this, we propose CircuitSynth, a novel neuro-symbolic framework that decouples semantic reasoning from surface realization. By distilling the reasoning capabilities of a Teacher LLM into a Probabilistic Sentential Decision Diagram (PSDD), CircuitSynth creates a tractable semantic prior that structurally enforces hard logical constraints. Furthermore, we introduce a convex optimization mechanism to rigorously satisfy soft distributional goals. Empirical evaluations across diverse benchmarks demonstrate that CircuitSynth achieves 100{\%} Schema Validity even in complex logic puzzles where unconstrained baselines fail (12.4{\%}) while significantly outperforming state-of-the-art methods in rare-combination coverage."
}Markdown (Informal)
[CircuitSynth: Reliable Synthetic Data Generation](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1770/) (Cheng et al., Findings 2026)
ACL
- Zehua Cheng, Wei Dai, Jiahao Sun, and Thomas Lukasiewicz. 2026. CircuitSynth: Reliable Synthetic Data Generation. In Findings of the Association for Computational Linguistics: ACL 2026, pages 35542–35552, San Diego, California, United States. Association for Computational Linguistics.