@inproceedings{papusoi-nisioi-2025-comparison,
title = "A Comparison of Elementary Baselines for {B}aby{LM}",
author = "P{\u{a}}pușoi, Rareș and
Nisioi, Sergiu",
editor = "Charpentier, Lucas and
Choshen, Leshem and
Cotterell, Ryan and
Gul, Mustafa Omer and
Hu, Michael Y. and
Liu, Jing and
Jumelet, Jaap and
Linzen, Tal and
Mueller, Aaron and
Ross, Candace and
Shah, Raj Sanjay and
Warstadt, Alex and
Wilcox, Ethan Gotlieb and
Williams, Adina",
booktitle = "Proceedings of the First BabyLM Workshop",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.babylm-main.16/",
pages = "218--225",
ISBN = "TODO",
abstract = "This paper explores multiple simple baselines for the BabyLM challenge, covering random models, elementary predictions based on frequency, n-gram language models, LSTM with several tokenizers (BPE, Unigram, SuperBPE), and GPT-BERT, the winning architecture from the prior BabyLM edition. The evaluation is focused on the BLiMP and BLiMP-Supplement benchmarks. Our experiments show that Strict-Small can sometimes outperform Strict, the fact that performance can be highly sensitive to tokenization and the importance of data efficiency. A simple word-frequency baseline scored unexpectedly high, which led to identifying an evaluation artifact in the pipeline: a system that returns identical logits for both sentences in a minimal pair can achieve maximal accuracy."
}Markdown (Informal)
[A Comparison of Elementary Baselines for BabyLM](https://preview.aclanthology.org/ingest-emnlp/2025.babylm-main.16/) (Păpușoi & Nisioi, BabyLM 2025)
ACL