@inproceedings{herron-2024-evaluation,
title = "An evaluation of current benchmarking strategies for {F}rench biomedical language models",
author = "Herron, Felix",
editor = "Balaguer, Mathieu and
Bendahman, Nihed and
Ho-dac, Lydia-Mai and
Mauclair, Julie and
G Moreno, Jose and
Pinquier, Julien",
booktitle = "Actes de la 26{\`e}me Rencontre des {\'E}tudiants Chercheurs en Informatique pour le Traitement Automatique des Langues",
month = "7",
year = "2024",
address = "Toulouse, France",
publisher = "ATALA and AFPC",
url = "https://aclanthology.org/2024.jeptalnrecital-recital.1",
pages = "1--16",
abstract = "We describe the current state of benchmarking for French language biomedical natural language processing (NLP). We note two important criteria in biomedical benchmarking: first, that a biomedical benchmark clearly simulate a specific use cases, in order to offer a useful evaluation of a biomedical model{'}s real life applicability. Second: that a biomedical benchmark be created in collaboration with biomedical professionals. We note that many biomedical benchmarks, particularly in French, do not adhere to these criteria; however, we highlight other biomedical benchmarks which adhere better to those criteria. Furthermore, we evaluate some of the most common French biomedical benchmarks on an array of models and empirically support the necessity of domain-specific and language-specific pre-training for natural language understanding (NLU) tasks. We show that some popular French biomedical language models perform poorly and/or inconsistently on important biomedical tasks. Finally, we advocate for an increase in publicly available, clinically targeted French biomedical NLU benchmarks.",
}
Markdown (Informal)
[An evaluation of current benchmarking strategies for French biomedical language models](https://aclanthology.org/2024.jeptalnrecital-recital.1) (Herron, JEP/TALN/RECITAL 2024)
ACL