@inproceedings{tai-van-2025-llmsr,
title = "{LLMSR}@{XLLM}25: Integrating Reasoning Prompt Strategies with Structural Prompt Formats for Enhanced Logical Inference",
author = "Tai, Le and
Van, Thin",
editor = "Fei, Hao and
Tu, Kewei and
Zhang, Yuhui and
Hu, Xiang and
Han, Wenjuan and
Jia, Zixia and
Zheng, Zilong and
Cao, Yixin and
Zhang, Meishan and
Lu, Wei and
Siddharth, N. and
{\O}vrelid, Lilja and
Xue, Nianwen and
Zhang, Yue",
booktitle = "Proceedings of the 1st Joint Workshop on Large Language Models and Structure Modeling (XLLM 2025)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.xllm-1.27/",
pages = "310--316",
ISBN = "979-8-89176-286-2",
abstract = "This paper illustrates our NBTailee team sys- tem approach in XLLM-ACL 2025 Task-III: LLM for Structural Reasoning (LLM-SR), aim- ing to solve both Task: Question parsing and CoT parsing. The process of extracting state- ments and evidence is similar to Discourse Pars- ing. Correct extraction of statements or evi- dence from the COT is crucial at the outset. Next, the pairwise relationship between a spe- cific statement and its corresponding evidence is assessed (a statement should be followed by its related evidence from the CoT). Both seman- tic and lexical similarity are used to evaluate the accuracy of statements and evidence predic- tions. Finally, once a statement-evidence pair is correctly extracted, it is evaluated to deter- mine whether the evidence can logically deduce the statement. To tackle Question Parsing and CoT Parsing, we implement and investigate var- ious solutions, including (1) applying different structural prompt formats like JSON, Mark- down, or XML. (2) utilising various prompt techniques: Few-shot, Chain of thought, and Multi-hop prompting. (3) Taking advantage of Natural Language Inference (NLI) model for the Statement Verification step. Our best of- ficial result is a 243.047 mean score for test phases A and B, and finally, we rank 7th on the final leaderboard."
}