@inproceedings{wang-etal-2025-distribution,
title = "Distribution Prompting: Understanding the Expressivity of Language Models Through the Next-Token Distributions They Can Produce",
author = "Wang, Haojin and
Zhu, Zining and
Shi, Freda",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1057/",
pages = "20915--20928",
ISBN = "979-8-89176-332-6",
abstract = "Autoregressive neural language models (LMs) generate a probability distribution over tokens at each time step given a prompt. In this work, we attempt to systematically understand the probability distributions that LMs can produce, showing that some distributions are significantly harder to elicit than others. Specifically, for any target next-token distribution over the vocabulary, we attempt to find a prompt that induces the LM to output a distribution as close as possible to the target, using either soft or hard gradient-based prompt tuning. We find that (1) in general, distributions with very low or very high entropy are easier to approximate than those with moderate entropy; (2) among distributions with the same entropy, those containing ``outlier tokens'' are easier to approximate; (3) target distributions generated by LMs {--} even LMs with different tokenizers {--} are easier to approximate than randomly chosen targets. These results offer insights into the expressiveness of LMs and the challenges of using them as probability distribution proposers."
}Markdown (Informal)
[Distribution Prompting: Understanding the Expressivity of Language Models Through the Next-Token Distributions They Can Produce](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.1057/) (Wang et al., EMNLP 2025)
ACL