@inproceedings{han-etal-2025-rule,
title = "Rule Discovery for Natural Language Inference Data Generation Using Out-of-Distribution Detection",
author = "Han, Juyoung and
Hwang, Hyunsun and
Lee, Changki",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.1319/",
doi = "10.18653/v1/2025.emnlp-main.1319",
pages = "25982--26002",
ISBN = "979-8-89176-332-6",
abstract = "Natural Language Inference (NLI) is a fundamental task in Natural Language Processing (NLP), yet adapting NLI models to new domains remains challenging due to the high cost of collecting domain-specific training data. While prior work proposed 15 sentence transformation rules to automate training data generation, these rules insufficiently capture the diversity of natural language. We propose a novel framework that combines Out-of-Distribution (OOD) detection and BERT-based clustering to identify premise{--}hypothesis pairs in the SNLI dataset that are not covered by existing rules and to discover four new transformation rules from them. Using these rules with Chain-of-Thought (CoT) prompting and Large Language Models (LLMs), we generate high-quality training data and augment the SNLI dataset. Our method yields consistent performance improvements across dataset sizes, achieving +0.85{\%}p accuracy on 2k and +0.15{\%}p on 550k samples. Furthermore, a distribution-aware augmentation strategy enhances performance across all scales. Beyond manual explanations, we extend our framework to automatically generated explanations (CoT-Ex), demonstrating that they provide a scalable alternative to human-written explanations and enable reliable rule discovery."
}Markdown (Informal)
[Rule Discovery for Natural Language Inference Data Generation Using Out-of-Distribution Detection](https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.1319/) (Han et al., EMNLP 2025)
ACL