@inproceedings{xie-etal-2018-large,
title = "Large-scale Cloze Test Dataset Created by Teachers",
author = "Xie, Qizhe and
Lai, Guokun and
Dai, Zihang and
Hovy, Eduard",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1257/",
doi = "10.18653/v1/D18-1257",
pages = "2344--2356",
abstract = "Cloze tests are widely adopted in language exams to evaluate students' language proficiency. In this paper, we propose the first large-scale human-created cloze test dataset CLOTH, containing questions used in middle-school and high-school language exams. With missing blanks carefully created by teachers and candidate choices purposely designed to be nuanced, CLOTH requires a deeper language understanding and a wider attention span than previously automatically-generated cloze datasets. We test the performance of dedicatedly designed baseline models including a language model trained on the One Billion Word Corpus and show humans outperform them by a significant margin. We investigate the source of the performance gap, trace model deficiencies to some distinct properties of CLOTH, and identify the limited ability of comprehending the long-term context to be the key bottleneck."
}
Markdown (Informal)
[Large-scale Cloze Test Dataset Created by Teachers](https://preview.aclanthology.org/jlcl-multiple-ingestion/D18-1257/) (Xie et al., EMNLP 2018)
ACL
- Qizhe Xie, Guokun Lai, Zihang Dai, and Eduard Hovy. 2018. Large-scale Cloze Test Dataset Created by Teachers. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 2344–2356, Brussels, Belgium. Association for Computational Linguistics.