@inproceedings{poh-etal-2026-making,
title = "Making Synthetic Questions More Child-Directed: Prompting and Sampling Effects",
author = "Poh, Whitney and
Tombolini, Michael and
Barak, Libby",
editor = "Ma, Martin Ziqiao and
Liu, Emmy and
Liu, Jing and
Chang, Tyler A. and
Fourtassi, Abdellah and
Warstadt, Alex and
Hahn, Michael and
Sun, Weiwei and
Shi, Freda",
booktitle = "Proceedings of the 1st Workshop on Computational Developmental Linguistics ({CDL})",
month = jul,
year = "2026",
address = "Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.cdl-1.18/",
pages = "129--135",
ISBN = "979-8-89176-428-6",
abstract = "Child-directed Speech (CDS) has been shown to better support language learning as training data for computational models. Artificially generated input aims at replicating the advantage of CDS by re-creating targeted linguistic properties. Recently, the use of questions in CDS has been suggested as a linguistic property that may entail an effective discourse structure for model training. However, previous work has shown inconsistent improvement over baseline using questions in training data. In this study, we propose a new question generation method that aligns both the generation prompts and sampling methods with properties of CDS. We show that prompt wording substantially changes whether synthetic questions match CDS on surface properties such as MLU and question type. Despite marked improvements over baseline, enhanced CDS-likeness does not translate into consistent downstream gains. Overall, our results show that the role of questions in training data is a topic worth looking further into."
}Markdown (Informal)
[Making Synthetic Questions More Child-Directed: Prompting and Sampling Effects](https://preview.aclanthology.org/ingest-acl-workshops/2026.cdl-1.18/) (Poh et al., CDL 2026)
ACL