@inproceedings{asare-aryal-2026-ai4pc,
title = "{AI}4{PC}-{H}oward {U}niversity at {S}em{E}val-2026 Task 5: Calibrated Hybrid Ensembling and Retrieval-Augmented {LLM} Reasoning for Narrative Word-Sense Plausibility",
author = "Asare, Kwaku and
Aryal, Saurav",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.326/",
pages = "2592--2596",
ISBN = "979-8-89176-414-9",
abstract = "We present two complementary approaches for rating word-sense plausibility in SemEval-2026 Task 5 (literary homonyms in five-sentence stories). Approach 1 is a retrieve-then-generate pipeline using an open-weight Llama 3.1 70B Instruct model with structured reasoning and a self-correction pass. Approach 2 is a hybrid ensemble that combines API-based LLM prompting with transformer representations and a learned calibration layer trained on the development set. On the development set, Approach 2 achieves Spearman {\ensuremath{\rho}} = 0.7393 (p 10-102) with accuracy 0.8010 (471/588). Approach 1 achieves {\ensuremath{\rho}} = 0.5187 (p 10-65) with accuracy 0.6032 (561/930). We emphasize that Approach 1 does not exceed RoBERTabase in accuracy (0.6032 vs. 0.6410), but provides stronger rank correlation."
}Markdown (Informal)
[AI4PC-Howard University at SemEval-2026 Task 5: Calibrated Hybrid Ensembling and Retrieval-Augmented LLM Reasoning for Narrative Word-Sense Plausibility](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.326/) (Asare & Aryal, SemEval 2026)
ACL