@inproceedings{gnehm-clematide-2025-improving,
title = "Improving Occupational {ISCO} Classification of Multilingual {S}wiss Job Postings with {LLM}-Refined Training Data",
author = "Gnehm, Ann-Sophie and
Clematide, Simon",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1124/",
pages = "21834--21847",
ISBN = "979-8-89176-256-5",
abstract = "Classifying occupations in multilingual job postings is challenging due to noisy labels, language variation, and domain-specific terminology. We present a method that refines silver-standard ISCO labels by consolidating them with predictions from pre-fine-tuned models, using large language model (LLM) evaluations to resolve discrepancies. The refined labels are used in Multiple Negatives Ranking (MNR) training for SentenceBERT-based classification. This approach substantially improves performance, raising Top-1 accuracy on silver data from 37.2{\%} to 58.3{\%} and reaching up to 80{\%} precision on held-out data{---}an over 30-point gain validated by both GPT and human raters. The model benefits from cross-lingual transfer, with particularly strong gains in French and Italian. These results demonstrate hat LLM-guided label refinement can substantially improve multilingual occupation classification in fine-grained taxonomies such as CH-ISCO with 670 classes."
}
Markdown (Informal)
[Improving Occupational ISCO Classification of Multilingual Swiss Job Postings with LLM-Refined Training Data](https://preview.aclanthology.org/display_plenaries/2025.findings-acl.1124/) (Gnehm & Clematide, Findings 2025)
ACL