@inproceedings{capone-etal-2024-concretegpt,
title = "{C}oncrete{GPT}: A Baby {GPT}-2 Based on Lexical Concreteness and Curriculum Learning",
author = "Capone, Luca and
Bondielli, Alessandro and
Lenci, Alessandro",
editor = "Hu, Michael Y. and
Mueller, Aaron and
Ross, Candace and
Williams, Adina and
Linzen, Tal and
Zhuang, Chengxu and
Choshen, Leshem and
Cotterell, Ryan and
Warstadt, Alex and
Wilcox, Ethan Gotlieb",
booktitle = "The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning",
month = nov,
year = "2024",
address = "Miami, FL, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.conll-babylm.16/",
pages = "189--196",
abstract = "We present a model for the Strict-Small track of the BabyLM Challenge 2024 (Choshen et al. 2024). We introduce a Curriculum Learning approach for training a specialized version of GPT-2 (Radford et al. 2019), that we name ConcreteGPT. We utilize the norms from (Brysbaert et al. 2014) which provide concreteness ratings for 40,000 English lexical items based on human subjects. Using these norms, we assign a concreteness score to each sentence in the training dataset and develop two curriculum strategies that progressively introduce more complex and abstract language patterns in the training data. Compared to the baselines, our best model shows lower performance on zero-shot tasks but demonstrates superior performance in fine-tuning tasks. Notably, our curriculum-trained models exhibit significant improvements over a non-curriculum based training of the same model."
}
Markdown (Informal)
[ConcreteGPT: A Baby GPT-2 Based on Lexical Concreteness and Curriculum Learning](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.conll-babylm.16/) (Capone et al., CoNLL-BabyLM 2024)
ACL