@inproceedings{zhang-etal-2024-text-segmentation,
title = "From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts",
author = "Zhang, Wang and
Wang, Xin and
Wang, Qian and
Deng, Tao and
Wu, Xiaoru",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-emnlp.402/",
doi = "10.18653/v1/2024.findings-emnlp.402",
pages = "6864--6873",
abstract = "Multi-label text classification (MLTC) is an important task in the field of natural language processing. Most existing models rely on high-quality text representations provided by pre-trained language models (PLMs). They hence face the challenge of input length limitation caused by PLMs, when dealing with long texts. In light of this, we introduce a comprehensive approach to multi-label long text classification. We propose a text segmentation algorithm, which guarantees to produce the optimal segmentation, to address the issue of input length limitation caused by PLMs. We incorporate external knowledge, labels' co-occurrence relations, and attention mechanisms in representation learning to enhance both text and label representations. Our method`s effectiveness is validated through extensive experiments on various MLTC datasets, unraveling the intricate correlations between texts and labels."
}
Markdown (Informal)
[From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-emnlp.402/) (Zhang et al., Findings 2024)
ACL