@inproceedings{maarouf-tanguy-2025-automatic,
title = "Automatic normalization of noisy technical reports with an {LLM}: What effects on a downstream task?",
author = "Maarouf, Mariame and
Tanguy, Ludovic",
editor = "Bak, JinYeong and
Goot, Rob van der and
Jang, Hyeju and
Buaphet, Weerayut and
Ramponi, Alan and
Xu, Wei and
Ritter, Alan",
booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.wnut-1.5/",
pages = "38--44",
ISBN = "979-8-89176-232-9",
abstract = "This study explores the automatic normalization of noisy and highly technical anomaly reports by an LLM. Different prompts are tested to instruct the LLM to clean the text without changing the structure, vocabulary or specialized lexicon. The evaluation of this task is made in two steps. First, the Character Error Rate (CER) is calculated to assess the changes made compared to a gold standard on a small sample. Second, an automatic sequence labeling task is performed on the original and on the corrected datasets with a transformer-based classifier. If some configurations of LLM and prompts can reach satisfying CER scores, the sequence labeling task shows that the normalization has a small negative impact on performance."
}
Markdown (Informal)
[Automatic normalization of noisy technical reports with an LLM: What effects on a downstream task?](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2025.wnut-1.5/) (Maarouf & Tanguy, WNUT 2025)
ACL