@inproceedings{li-etal-2018-dimsim,
title = "{DIMSIM}: An Accurate {C}hinese Phonetic Similarity Algorithm Based on Learned High Dimensional Encoding",
author = "Li, Min and
Danilevsky, Marina and
Noeman, Sara and
Li, Yunyao",
editor = "Korhonen, Anna and
Titov, Ivan",
booktitle = "Proceedings of the 22nd Conference on Computational Natural Language Learning",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/K18-1043/",
doi = "10.18653/v1/K18-1043",
pages = "444--453",
abstract = "Phonetic similarity algorithms identify words and phrases with similar pronunciation which are used in many natural language processing tasks. However, existing approaches are designed mainly for Indo-European languages and fail to capture the unique properties of Chinese pronunciation. In this paper, we propose a high dimensional encoded phonetic similarity algorithm for Chinese, DIMSIM. The encodings are learned from annotated data to separately map initial and final phonemes into n-dimensional coordinates. Pinyin phonetic similarities are then calculated by aggregating the similarities of initial, final and tone. DIMSIM demonstrates a 7.5X improvement on mean reciprocal rank over the state-of-the-art phonetic similarity approaches."
}
Markdown (Informal)
[DIMSIM: An Accurate Chinese Phonetic Similarity Algorithm Based on Learned High Dimensional Encoding](https://preview.aclanthology.org/add-emnlp-2024-awards/K18-1043/) (Li et al., CoNLL 2018)
ACL