@inproceedings{mandic-etal-2025-distribution,
title = "From In-Distribution to Out-of-Distribution: Joint Loss for Improving Generalization in Software Mention and Relation Extraction",
author = "Mandic, Stasa and
Niess, Georg and
Kern, Roman",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.sdp-1.14/",
doi = "10.18653/v1/2025.sdp-1.14",
pages = "146--153",
ISBN = "979-8-89176-265-7",
abstract = "Identifying software entities and their semantic relations in scientific texts is key for reproducibility and machine-readable knowledge graphs, yet models struggle with domain variability and sparse supervision. We address this by evaluating joint Named Entity Recognition (NER) and Relation Extraction (RE) models on the SOMD 2025 shared task, emphasizing generalization to out-of-domain scholarly texts. We propose a unified training objective that jointly optimizes both tasks using a shared loss function and demonstrates that joint loss formulations can improve out-of-domain robustness compared to disjoint training. Our results reveal significant performance gaps between in- and out-of-domain settings, prompting critical reflections on modeling strategies for software knowledge extraction. Notably, our approach ranked 1st in Phase 2 (out-of-distribution) and 2nd in Phase 1 (in-distribution) in the SOMD 2025 shared task, showing strong generalization and robust performance across domains."
}
Markdown (Informal)
[From In-Distribution to Out-of-Distribution: Joint Loss for Improving Generalization in Software Mention and Relation Extraction](https://preview.aclanthology.org/landing_page/2025.sdp-1.14/) (Mandic et al., sdp 2025)
ACL