@inproceedings{meisenbacher-norlander-2026-building,
title = "Building a Custom Taxonomy of {AI} Skills and Tasks from the Ground Up with Job Postings",
author = "Meisenbacher, Stephen and
Norlander, Peter",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.11/",
pages = "117--130",
ISBN = "979-8-89176-396-8",
abstract = "Utilizing LLMs for automated taxonomy construction presents a clear opportunity for the comprehensive, yet efficient mapping of potentially complex domains. When contending with high volumes of rapidly growing corpora, however, it becomes unclear how to best leverage such data for optimal taxonomy construction. Taking the case of systematizing *AI skills in the workplace*, we use two large-scale job postings corpora to investigate key design decisions for the inclusion (or exclusion) of data points for taxonomy construction. We propose **TaxonomyBuilder** as a blueprint for our systematic study, with which we evaluate various configurations of custom, data-informed, and hierarchical taxonomies. We demonstrate that *less* data can provide more clarity: filtering inputs to **TaxonomyBuilder** provides better domain-specific coverage than offering unfiltered inputs to clustering and LLM-enhanced hierarchical taxonomy labeling tools."
}Markdown (Informal)
[Building a Custom Taxonomy of AI Skills and Tasks from the Ground Up with Job Postings](https://preview.aclanthology.org/ingest-acl-workshops/2026.customnlp4u-1.11/) (Meisenbacher & Norlander, CustomNLP4U 2026)
ACL