@inproceedings{kim-etal-2019-audiocaps, title = "{A}udio{C}aps: Generating Captions for Audios in The Wild", author = "Kim, Chris Dongjoo and Kim, Byeongchang and Lee, Hyunmin and Kim, Gunhee", editor = "Burstein, Jill and Doran, Christy and Solorio, Thamar", booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)", month = jun, year = "2019", address = "Minneapolis, Minnesota", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/N19-1011/", doi = "10.18653/v1/N19-1011", pages = "119--132" }