@inproceedings{aguiar-etal-2024-seme,
title = "{SEME} at {S}em{E}val-2024 Task 2: Comparing Masked and Generative Language Models on Natural Language Inference for Clinical Trials",
author = "Aguiar, Mathilde and
Zweigenbaum, Pierre and
Naderi, Nona",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.143/",
doi = "10.18653/v1/2024.semeval-1.143",
pages = "986--996",
abstract = "This paper describes our submission to Task 2 of SemEval-2024: Safe Biomedical Natural Language Inference for Clinical Trials. The Multi-evidence Natural Language Inference for Clinical Trial Data (NLI4CT) consists of a Textual Entailment (TE) task focused on the evaluation of the consistency and faithfulness of Natural Language Inference (NLI) models applied to Clinical Trial Reports (CTR). We test 2 distinct approaches, one based on finetuning and ensembling Masked Language Models and the other based on prompting Large Language Models using templates, in particular, using Chain-Of-Thought and Contrastive Chain-Of-Thought. Prompting Flan-T5-large in a 2-shot setting leads to our best system that achieves 0.57 F1 score, 0.64 Faithfulness, and 0.56 Consistency."
}
Markdown (Informal)
[SEME at SemEval-2024 Task 2: Comparing Masked and Generative Language Models on Natural Language Inference for Clinical Trials](https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.143/) (Aguiar et al., SemEval 2024)
ACL