@inproceedings{wang-etal-2023-m3seg,
title = "{M}$^3${S}eg: A Maximum-Minimum Mutual Information Paradigm for Unsupervised Topic Segmentation in {ASR} Transcripts",
author = "Wang, Ke and
Zhao, Xiutian and
Li, Yanghui and
Peng, Wei",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.492/",
doi = "10.18653/v1/2023.emnlp-main.492",
pages = "7928--7934",
abstract = "Topic segmentation aims to detect topic boundaries and split automatic speech recognition transcriptions (e.g., meeting transcripts) into segments that are bounded by thematic meanings. In this work, we propose M$^3$Seg, a novel Maximum-Minimum Mutual information paradigm for linear topic segmentation without using any parallel data. Specifically, by employing sentence representations provided by pre-trained language models, M$^3$Seg first learns a region-based segment encoder based on the maximization of mutual information between the global segment representation and the local contextual sentence representation. Secondly, an edge-based boundary detection module aims to segment the whole by topics based on minimizing the mutual information between different segments. Experiment results on two public datasets demonstrate the effectiveness of M$^3$Seg, which outperform the state-of-the-art methods by a significant (18{\%}{--}37{\%} improvement) margin."
}
Markdown (Informal)
[M3Seg: A Maximum-Minimum Mutual Information Paradigm for Unsupervised Topic Segmentation in ASR Transcripts](https://preview.aclanthology.org/add-emnlp-2024-awards/2023.emnlp-main.492/) (Wang et al., EMNLP 2023)
ACL