@inproceedings{yamada-etal-2026-takoyaki,
title = "Takoyaki at {S}em{E}val-2026 Task 3: Ensembling {LLM} Predictions using Demonstration Retrieval for Dimensional Aspect-based Sentiment Analysis",
author = "Yamada, Kosuke and
Takase, Sho and
Kohita, Ryosuke",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.219/",
pages = "1707--1723",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes our system for SemEval-2026 Task 3 (DimABSA). We participate in Subtask 2 (DimASTE), which requires extracting triplets of aspect term, opinion term, and valence-arousal scores from review sentences, and Subtask 3 (DimASQP), which additionally requires aspect category classification to form quadruplets. Our proposed system consists of a multi-step pipeline: (1) retrieval-based in-context learning using BM25 to select relevant demonstrations for LLM inference, (2) agreement-based ensemble combining LLM predictions from multiple retrieval variants, and, for a subset of datasets, (3) error-pattern correction refining uncertain predictions using correction rule sets based on training data. Retrieval-based ICL and the agreement-based ensemble show consistent improvements across languages and domains. Error-pattern correction yields further improvement for the Japanese dataset. To further investigate output quality beyond automated evaluation metrics, we conducted human evaluation. The results suggest that LLM-based labeling achieves higher agreement with gold labels than human annotators, and additionally indicate a discrepancy between automated scores and practical output quality."
}Markdown (Informal)
[Takoyaki at SemEval-2026 Task 3: Ensembling LLM Predictions using Demonstration Retrieval for Dimensional Aspect-based Sentiment Analysis](https://preview.aclanthology.org/ingest-acl-workshops/2026.semeval-1.219/) (Yamada et al., SemEval 2026)
ACL