@inproceedings{omura-etal-2024-empirical,
title = "An Empirical Study of Synthetic Data Generation for Implicit Discourse Relation Recognition",
author = "Omura, Kazumasa and
Cheng, Fei and
Kurohashi, Sadao",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.lrec-main.96/",
pages = "1073--1085",
abstract = "Implicit Discourse Relation Recognition (IDRR), which is the task of recognizing the semantic relation between given text spans that do not contain overt clues, is a long-standing and challenging problem. In particular, the paucity of training data for some error-prone discourse relations makes the problem even more challenging. To address this issue, we propose a method of generating synthetic data for IDRR using a large language model. The proposed method is summarized as two folds: extraction of confusing discourse relation pairs based on false negative rate and synthesis of data focused on the confusion. The key points of our proposed method are utilizing a confusion matrix and adopting two-stage prompting to obtain effective synthetic data. According to the proposed method, we generated synthetic data several times larger than training examples for some error-prone discourse relations and incorporated it into training. As a result of experiments, we achieved state-of-the-art macro-F1 performance thanks to the synthetic data without sacrificing micro-F1 performance and demonstrated its positive effects especially on recognizing some infrequent discourse relations."
}
Markdown (Informal)
[An Empirical Study of Synthetic Data Generation for Implicit Discourse Relation Recognition](https://preview.aclanthology.org/ingest_wac_2008/2024.lrec-main.96/) (Omura et al., LREC-COLING 2024)
ACL