@inproceedings{nishioka-akasegawa-2016-development,
title = "The development of a web corpus of {H}indi language and corpus-based comparative studies to {J}apanese",
author = "Nishioka, Miki and
Akasegawa, Shiro",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/W16-3712/",
pages = "114--123",
abstract = "In this paper, we discuss our creation of a web corpus of spoken Hindi (COSH), one of the Indo-Aryan languages spoken mainly in the Indian subcontinent. We also point out notable problems we`ve encountered in the web corpus and the special concordancer. After observing the kind of technical problems we encountered, especially regarding annotation tagged by Shiva Reddy`s tagger, we argue how they can be solved when using COSH for linguistic studies. Finally, we mention the kinds of linguistic research that we non-native speakers of Hindi can do using the corpus, especially in pragmatics and semantics, and from a comparative viewpoint to Japanese."
}
Markdown (Informal)
[The development of a web corpus of Hindi language and corpus-based comparative studies to Japanese](https://preview.aclanthology.org/add-emnlp-2024-awards/W16-3712/) (Nishioka & Akasegawa, WSSANLP 2016)
ACL