@inproceedings{pirhadi-etal-2025-cvt5, title = "{CVT}5: Using Compressed Video Encoder and {UMT}5 for Dense Video Captioning", author = "Pirhadi, Mohammad Javad and Mirzaei, Motahhare and Eetemadi, Sauleh", editor = "Zhang, Wei Emma and Dai, Xiang and Elliot, Desmond and Fang, Byron and Sim, Mongyuan and Zhuang, Haojie and Chen, Weitong", booktitle = "Proceedings of the First Workshop of Evaluation of Multi-Modal Generation", month = jan, year = "2025", address = "Abu Dhabi, UAE", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2025.evalmg-1.2/", pages = "10--23" }