@inproceedings{gonzalez-etal-2024-numdecoders,
title = "{N}um{D}ecoders at {S}em{E}val-2024 Task 7: {F}lan{T}5 and {GPT} enhanced with {C}o{T} for Numerical Reasoning",
author = "Gonzalez, Andres and
Hossain, Md Zobaer and
Junaed, Jahedul Alam",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.183/",
doi = "10.18653/v1/2024.semeval-1.183",
pages = "1260--1268",
abstract = "In this paper we present a Chain-of-Thought enhanced solution for large language models, including flanT5 and GPT 3.5 Turbo, aimed at solving mathematical problems to fill in blanks from news headlines. Our approach builds on adata augmentation strategy that incorporates additional mathematical reasoning observations into the original dataset sourced from another mathematical corpus. Both automatic and manual annotations are applied to explicitly describe the reasoning steps required for models to reach the target answer. We employ an ensemble majority voting method to generate finalpredictions across our best-performing models. Our analysis reveals that while larger models trained with our enhanced dataset achieve significant gains (91{\%} accuracy, ranking 5th on the NumEval Task 3 leaderboard), smaller models do not experience improvements and may even see a decrease in overall accuracy. We conclude that improving our automatic an-notations via crowdsourcing methods can be a worthwhile endeavor to train larger models than the ones from this study to see the most accurate results."
}
Markdown (Informal)
[NumDecoders at SemEval-2024 Task 7: FlanT5 and GPT enhanced with CoT for Numerical Reasoning](https://preview.aclanthology.org/fix-sig-urls/2024.semeval-1.183/) (Gonzalez et al., SemEval 2024)
ACL