@inproceedings{grishina-etal-2020-truecasing,
title = "Truecasing {G}erman user-generated conversational text",
author = "Grishina, Yulia and
Gueudre, Thomas and
Winkler, Ralf",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.wnut-1.19/",
doi = "10.18653/v1/2020.wnut-1.19",
pages = "143--148",
abstract = "True-casing, the task of restoring proper case to (generally) lower case input, is important in downstream tasks and for screen display. In this paper, we investigate truecasing as an in- trinsic task and present several experiments on noisy user queries to a voice-controlled dia- log system. In particular, we compare a rule- based, an n-gram language model (LM) and a recurrent neural network (RNN) approaches, evaluating the results on a German Q{\&}A cor- pus and reporting accuracy for different case categories. We show that while RNNs reach higher accuracy especially on large datasets, character n-gram models with interpolation are still competitive, in particular on mixed- case words where their fall-back mechanisms come into play."
}
Markdown (Informal)
[Truecasing German user-generated conversational text](https://preview.aclanthology.org/fix-sig-urls/2020.wnut-1.19/) (Grishina et al., WNUT 2020)
ACL