@inproceedings{basmov-etal-2026-prompts,
title = "Prompts in the Wild: A Large Analyzed Collection of Transactional Prompts in Code",
author = "Basmov, Victoria and
Goldberg, Yoav and
Tsarfaty, Reut",
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.19/",
pages = "257--308",
ISBN = "979-8-89176-404-0",
abstract = "The behavior of contemporary generative Large Language Models (LLMs) is directly shaped by prompts, unstructured texts that describe the desired output and model behavior. In this paper we argue that prompts are linguistic objects that merit investigation in their own right. To this end, we collect 57.5K unique samples of prompts from GitHub. Specifically, we focus on transactional prompts: reproducible natural language instructions that are integrated into software. To enable the empirical, quantitative study of prompts, we introduce a structured ontology, capturing the properties of prompts as well as their formal and semantic components. Based on this ontology, we transform prompts from unstructured raw texts into richly structured linguistic objects. Analysis of these structured data reveals significant diversity of usage patterns across languages, domains, tasks, and modalities, in a typical Zipf-like distribution where some clearly prevail and others, more diverse, appear in the long tail. To validate the reliability of the ontology-based annotation of the prompts, we perform a comprehensive error analysis across all fields, providing a detailed assessment of annotation quality. We release the dataset together with a browsing and exploration interface."
}Markdown (Informal)
[Prompts in the Wild: A Large Analyzed Collection of Transactional Prompts in Code](https://preview.aclanthology.org/ingest-acl-workshops/2026.law-main.19/) (Basmov et al., LAW 2026)
ACL