@inproceedings{sagare-etal-2024-audio-visual,
    title = "Audio-visual training for improved grounding in video-text {LLM}s",
    author = "Sagare, Shivprasad Rajendra  and
      S, Hemachandran  and
      Sarabhai, Kinshuk  and
      Ullegaddi, Prashant  and
      Sa, Rajeshkumar",
    editor = "Mahamood, Saad  and
      Minh, Nguyen Le  and
      Ippolito, Daphne",
    booktitle = "Proceedings of the 17th International Natural Language Generation Conference",
    month = sep,
    year = "2024",
    address = "Tokyo, Japan",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/moar-dois/2024.inlg-main.36/",
    doi = "10.18653/v1/2024.inlg-main.36",
    pages = "440--445"
}