@inproceedings{wan-etal-2025-pateam,
title = "{PAT}eam at {S}em{E}val-2025 Task 9: {LLM}-Augmented Fusion for {AI}-Driven Food Safety Hazard Detection",
author = "Wan, Xue and
Su, Fengping and
Sun, Ling and
Lin, Yuyang and
Chen, Pengfei",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.249/",
pages = "1912--1918",
ISBN = "979-8-89176-273-2",
abstract = "This paper introduces the approach we adopted for the SemEval-2025 ``Food Hazard Detection'' task, which aims to predict coarse-grained categories (such as ``product category'' and ``hazard category'') and fine-grained vectors (such as specific products like ``ice cream'' or hazards like ``salmonella'') from noisy, long-tailed text data.To address the issues of dirty data, as well as the severe long-tail distribution of text labels and length in the data, we proposed a pipeline system. This system combines data cleaning, LLM-based enhancement, label resampling, and ensemble learning to tackle data sparsity and label imbalance problems.The two subtasks have strong semantic relatedness. By integrating them into a unified multiturn dialogue framework, we fine-tuned five models using a bagging approach. Ultimately, we achieved good results in both subtasks, ranking 5th (with an F1 score of 80.17{\%} for ST1 and 52.66{\%} for ST2)."
}
Markdown (Informal)
[PATeam at SemEval-2025 Task 9: LLM-Augmented Fusion for AI-Driven Food Safety Hazard Detection](https://preview.aclanthology.org/corrections-2025-08/2025.semeval-1.249/) (Wan et al., SemEval 2025)
ACL