@inproceedings{kovarik-2000-large, title = "How Should a Large Corpus Be Built?-A Comparative Study of Closure in Annotated Newspaper Corpora from Two {C}hinese Sources, Towards Building a Larger Representative Corpus Merged from Representative Sublanguage Collections", author = "Kovarik, John J.", booktitle = "Second {C}hinese Language Processing Workshop", month = oct, year = "2000", address = "Hong Kong, China", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/moar-dois/W00-1217/", doi = "10.3115/1117769.1117788", pages = "116--123" }