@inproceedings{irsoy-etal-2021-corrected,
title = "Corrected {CBOW} Performs as well as Skip-gram",
author = "{\.I}rsoy, Ozan and
Benton, Adrian and
Stratos, Karl",
editor = "Sedoc, Jo{\~a}o and
Rogers, Anna and
Rumshisky, Anna and
Tafreshi, Shabnam",
booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.insights-1.1",
doi = "10.18653/v1/2021.insights-1.1",
pages = "1--8",
abstract = "Mikolov et al. (2013a) observed that continuous bag-of-words (CBOW) word embeddings tend to underperform Skip-gram (SG) embeddings, and this finding has been reported in subsequent works. We find that these observations are driven not by fundamental differences in their training objectives, but more likely on faulty negative sampling CBOW implementations in popular libraries such as the official implementation, word2vec.c, and Gensim. We show that after correcting a bug in the CBOW gradient update, one can learn CBOW word embeddings that are fully competitive with SG on various intrinsic and extrinsic tasks, while being many times faster to train.",
}
Markdown (Informal)
[Corrected CBOW Performs as well as Skip-gram](https://aclanthology.org/2021.insights-1.1) (İrsoy et al., insights 2021)
ACL
- Ozan İrsoy, Adrian Benton, and Karl Stratos. 2021. Corrected CBOW Performs as well as Skip-gram. In Proceedings of the Second Workshop on Insights from Negative Results in NLP, pages 1–8, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.