@inproceedings{zhang-etal-2025-eng,
title = "{ENG}-{DRB}: {PDTB}-style Discourse Relation Bank on Engineering Tutorial Video Scripts",
author = "Zhang, Cheng and
Kakarla, Rajasekhar and
Wei, Kangda and
Huang, Ruihong",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.81/",
pages = "1318--1330",
ISBN = "979-8-89176-303-6",
abstract = "Discourse relation parsing plays a crucial role in uncovering the logical structure of text, yet existing corpora focus almost exclusively on general-domain genres, leaving specialized fields like engineering under-resourced. We introduce ENG{-}DRB, the first PDTB{-}style discourse relation corpus derived from transcripts of hands{-}on engineering tutorial videos. ENG{-}DRB comprises 11 tutorials spanning civil, mechanical, and electrical/electronics engineering (155 minutes total) with 1,215 annotated relations. Compared to general{-}domain benchmarks, this dataset features a high proportion of explicit senses, dense causal and temporal relations, and frequent overlapping and embedded senses. Our benchmarking experiments underscore the dataset{'}s difficulty. A top parser (HITS) detects segment boundaries well (98.6{\%} F1), but its relation classification is more than 11 F1 percentages lower than on the standard PDTB. In addition, state{-}of{-}the{-}art LLMs (OpenAI o4{-}mini, Claude 3.7, LLaMA{-}3.1) achieve at best 41{\%} F1 on explicit relations and less than 9{\%} F1 on implicit relations, revealing systematic errors in temporal and causal sense detection. The dataset can be accessed at: https://doi.org/10.57967/hf/6895. Code to reproduce our results is available at: https://github.com/chengzhangedu/ENG-DRB."
}Markdown (Informal)
[ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.81/) (Zhang et al., Findings 2025)
ACL
- Cheng Zhang, Rajasekhar Kakarla, Kangda Wei, and Ruihong Huang. 2025. ENG-DRB: PDTB-style Discourse Relation Bank on Engineering Tutorial Video Scripts. In Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics, pages 1318–1330, Mumbai, India. The Asian Federation of Natural Language Processing and The Association for Computational Linguistics.