@inproceedings{lee-etal-2024-hierarchical,
title = "Hierarchical Graph Convolutional Network Approach for Detecting Low-Quality Documents",
author = "Lee, Jaeyoung and
Jang, Joonwon and
Kim, Misuk",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.lrec-main.710/",
pages = "8108--8121",
abstract = "Consistency within a document is a crucial feature indicative of its quality. Recently, within the vast amount of information produced across various media, there exists a significant number of low-quality documents that either lack internal consistency or contain content utterly unrelated to their headlines. Such low-quality documents induce fatigue in readers and undermine the credibility of the media source that provided them. Consequently, research to automatically detect these low-quality documents based on natural language processing is imperative. In this study, we introduce a hierarchical graph convolutional network (HGCN) that can detect internal inconsistencies within a document and incongruences between the title and body. Moreover, we constructed the Inconsistency Dataset, leveraging published news data and its meta-data, to train our model to detect document inconsistencies. Experimental results demonstrated that the HGCN achieved superior performance with an accuracy of 91.20{\%} on our constructed Inconsistency Dataset, outperforming other comparative models. Additionally, on the publicly available incongruent-related dataset, the proposed methodology demonstrated a performance of 92.00{\%}, validating its general applicability. Finally, an ablation study further confirmed the significant impact of meta-data utilization on performance enhancement. We anticipate that our model can be universally applied to detect and filter low-quality documents in the real world."
}
Markdown (Informal)
[Hierarchical Graph Convolutional Network Approach for Detecting Low-Quality Documents](https://preview.aclanthology.org/Author-page-Marten-During-lu/2024.lrec-main.710/) (Lee et al., LREC-COLING 2024)
ACL