@inproceedings{tao-etal-2022-compression, title = "Compression of Generative Pre-trained Language Models via Quantization", author = "Tao, Chaofan and Hou, Lu and Zhang, Wei and Shang, Lifeng and Jiang, Xin and Liu, Qun and Luo, Ping and Wong, Ngai", editor = "Muresan, Smaranda and Nakov, Preslav and Villavicencio, Aline", booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = may, year = "2022", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.acl-long.331/", doi = "10.18653/v1/2022.acl-long.331", pages = "4821--4836" }