@inproceedings{limisiewicz-etal-2023-tokenization, title = "Tokenization Impacts Multilingual Language Modeling: Assessing Vocabulary Allocation and Overlap Across Languages", author = "Limisiewicz, Tomasz and Balhar, Ji{\v{r}}{\'i} and Mare{\v{c}}ek, David", editor = "Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki", booktitle = "Findings of the Association for Computational Linguistics: ACL 2023", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.350/", doi = "10.18653/v1/2023.findings-acl.350", pages = "5661--5681" }