@inproceedings{ali-etal-2024-tokenizer,
    title = "Tokenizer Choice For {LLM} Training: Negligible or Crucial?",
    author = {Ali, Mehdi  and
      Fromm, Michael  and
      Thellmann, Klaudia  and
      Rutmann, Richard  and
      L{\"u}bbering, Max  and
      Leveling, Johannes  and
      Klug, Katrin  and
      Ebert, Jan  and
      Doll, Niclas  and
      Buschhoff, Jasper  and
      Jain, Charvi  and
      Weber, Alexander  and
      Jurkschat, Lena  and
      Abdelwahab, Hammam  and
      John, Chelsea  and
      Ortiz Suarez, Pedro  and
      Ostendorff, Malte  and
      Weinbach, Samuel  and
      Sifa, Rafet  and
      Kesselheim, Stefan  and
      Flores-Herr, Nicolas},
    editor = "Duh, Kevin  and
      Gomez, Helena  and
      Bethard, Steven",
    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
    month = jun,
    year = "2024",
    address = "Mexico City, Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.findings-naacl.247/",
    doi = "10.18653/v1/2024.findings-naacl.247",
    pages = "3907--3924"
}