@inproceedings{ali-etal-2024-tokenizer, title = "Tokenizer Choice For {LLM} Training: Negligible or Crucial?", author = {Ali, Mehdi and Fromm, Michael and Thellmann, Klaudia and Rutmann, Richard and L{\"u}bbering, Max and Leveling, Johannes and Klug, Katrin and Ebert, Jan and Doll, Niclas and Buschhoff, Jasper and Jain, Charvi and Weber, Alexander and Jurkschat, Lena and Abdelwahab, Hammam and John, Chelsea and Ortiz Suarez, Pedro and Ostendorff, Malte and Weinbach, Samuel and Sifa, Rafet and Kesselheim, Stefan and Flores-Herr, Nicolas}, editor = "Duh, Kevin and Gomez, Helena and Bethard, Steven", booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-naacl.247/", doi = "10.18653/v1/2024.findings-naacl.247", pages = "3907--3924" }