@inproceedings{bast-etal-2023-fast,
title = "Fast Whitespace Correction with Encoder-Only Transformers",
author = "Bast, Hannah and
Hertel, Matthias and
Walter, Sebastian",
editor = "Bollegala, Danushka and
Huang, Ruihong and
Ritter, Alan",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.acl-demo.37/",
doi = "10.18653/v1/2023.acl-demo.37",
pages = "389--399",
abstract = "The goal of whitespace correction is to fix space errors in arbitrary given text. For example, given the text {\textquotedblleft}whi te space correctio nwithTransf or mers{\textquotedblright}, produce {\textquotedblleft}whitespace correction with Transformers{\textquotedblright}. We compare two Transformer-based models, a character-level encoder-decoder model and a byte-level encoder-only model. We find that the encoder-only model is both faster and achieves higher quality. We provide an easy-to-use tool that is over 900 times faster than the previous best tool, with the same high quality. Our tool repairs text at a rate of over 200 kB/s on GPU, with a sequence-averaged F1-score ranging from 87.5{\%} for hard-to-correct text up to 99{\%} for text without any spaces."
}
Markdown (Informal)
[Fast Whitespace Correction with Encoder-Only Transformers](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.acl-demo.37/) (Bast et al., ACL 2023)
ACL
- Hannah Bast, Matthias Hertel, and Sebastian Walter. 2023. Fast Whitespace Correction with Encoder-Only Transformers. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 389–399, Toronto, Canada. Association for Computational Linguistics.