@inproceedings{ishii-etal-2024-jemhopqa,
title = "{JEMH}op{QA}: Dataset for {J}apanese Explainable Multi-Hop Question Answering",
author = "Ishii, Ai and
Inoue, Naoya and
Suzuki, Hisami and
Sekine, Satoshi",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.831/",
pages = "9515--9525",
abstract = "We present JEMHopQA, a multi-hop QA dataset for the development of explainable QA systems. The dataset consists not only of question-answer pairs, but also of supporting evidence in the form of derivation triples, which contributes to making the QA task more realistic and difficult. It is created based on Japanese Wikipedia using both crowd-sourced human annotation as well as prompting a large language model (LLM), and contains a diverse set of question, answer and topic categories as compared with similar datasets released previously. We describe the details of how we built the dataset as well as the evaluation of the QA task presented by this dataset using GPT-4, and show that the dataset is sufficiently challenging for the state-of-the-art LLM while showing promise for combining such a model with existing knowledge resources to achieve better performance."
}
Markdown (Informal)
[JEMHopQA: Dataset for Japanese Explainable Multi-Hop Question Answering](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.831/) (Ishii et al., LREC-COLING 2024)
ACL