@inproceedings{khoury-etal-2020-vector, title = "Vector-Vector-Matrix Architecture: A Novel Hardware-Aware Framework for Low-Latency Inference in {NLP} Applications", author = "Khoury, Matthew and Dangovski, Rumen and Ou, Longwu and Nakov, Preslav and Shen, Yichen and Jing, Li", editor = "Webber, Bonnie and Cohn, Trevor and He, Yulan and Liu, Yang", booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)", month = nov, year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.640/", doi = "10.18653/v1/2020.emnlp-main.640", pages = "7975--7984" }