@inproceedings{sagare-etal-2024-audio-visual, title = "Audio-visual training for improved grounding in video-text {LLM}s", author = "Sagare, Shivprasad Rajendra and S, Hemachandran and Sarabhai, Kinshuk and Ullegaddi, Prashant and Sa, Rajeshkumar", editor = "Mahamood, Saad and Minh, Nguyen Le and Ippolito, Daphne", booktitle = "Proceedings of the 17th International Natural Language Generation Conference", month = sep, year = "2024", address = "Tokyo, Japan", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.inlg-main.36/", pages = "440--445" }