@inproceedings{dao-etal-2022-disfluency,
title = "Disfluency Detection for {V}ietnamese",
author = "Dao, Mai Hoang and
Truong, Thinh Hung and
Nguyen, Dat Quoc",
booktitle = "Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.wnut-1.21/",
pages = "194--200",
abstract = "In this paper, we present the first empirical study for Vietnamese disfluency detection. To conduct this study, we first create a disfluency detection dataset for Vietnamese, with manual annotations over two disfluency types. We then empirically perform experiments using strong baseline models, and find that: automatic Vietnamese word segmentation improves the disfluency detection performances of the baselines, and the highest performance results are obtained by fine-tuning pre-trained language models in which the monolingual model PhoBERT for Vietnamese does better than the multilingual model XLM-R."
}
Markdown (Informal)
[Disfluency Detection for Vietnamese](https://preview.aclanthology.org/fix-sig-urls/2022.wnut-1.21/) (Dao et al., WNUT 2022)
ACL
- Mai Hoang Dao, Thinh Hung Truong, and Dat Quoc Nguyen. 2022. Disfluency Detection for Vietnamese. In Proceedings of the Eighth Workshop on Noisy User-generated Text (W-NUT 2022), pages 194–200, Gyeongju, Republic of Korea. Association for Computational Linguistics.