@inproceedings{nothman-etal-2018-stop,
title = "Stop Word Lists in Free Open-source Software Packages",
author = "Nothman, Joel and
Qin, Hanmin and
Yurchak, Roman",
booktitle = "Proceedings of Workshop for {NLP} Open Source Software ({NLP}-{OSS})",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-2502",
doi = "10.18653/v1/W18-2502",
pages = "7--12",
abstract = "Open-source software packages for language processing often include stop word lists. Users may apply them without awareness of their surprising omissions (e.g. {``}hasn{'}t{''} but not {``}hadn{'}t{''}) and inclusions ({``}computer{''}), or their incompatibility with a particular tokenizer. Motivated by issues raised about the Scikit-learn stop list, we investigate variation among and consistency within 52 popular English-language stop lists, and propose strategies for mitigating these issues.",
}
Markdown (Informal)
[Stop Word Lists in Free Open-source Software Packages](https://aclanthology.org/W18-2502) (Nothman et al., NLPOSS 2018)
ACL