@inproceedings{hu-etal-2019-ensemble,
title = "Ensemble Methods to Distinguish Mainland and {T}aiwan {C}hinese",
author = "Hu, Hai and
Li, Wen and
Zhou, He and
Tian, Zuoyu and
Zhang, Yiwen and
Zou, Liang",
editor = {Zampieri, Marcos and
Nakov, Preslav and
Malmasi, Shervin and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Ali, Ahmed},
booktitle = "Proceedings of the Sixth Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = jun,
year = "2019",
address = "Ann Arbor, Michigan",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/W19-1417/",
doi = "10.18653/v1/W19-1417",
pages = "165--171",
abstract = "This paper describes the IUCL system at VarDial 2019 evaluation campaign for the task of discriminating between Mainland and Taiwan variation of mandarin Chinese. We first build several base classifiers, including a Naive Bayes classifier with word n-gram as features, SVMs with both character and syntactic features, and neural networks with pre-trained character/word embeddings. Then we adopt ensemble methods to combine output from base classifiers to make final predictions. Our ensemble models achieve the highest F1 score (0.893) in simplified Chinese track and the second highest (0.901) in traditional Chinese track. Our results demonstrate the effectiveness and robustness of the ensemble methods."
}
Markdown (Informal)
[Ensemble Methods to Distinguish Mainland and Taiwan Chinese](https://preview.aclanthology.org/jlcl-multiple-ingestion/W19-1417/) (Hu et al., VarDial 2019)
ACL
- Hai Hu, Wen Li, He Zhou, Zuoyu Tian, Yiwen Zhang, and Liang Zou. 2019. Ensemble Methods to Distinguish Mainland and Taiwan Chinese. In Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects, pages 165–171, Ann Arbor, Michigan. Association for Computational Linguistics.