@article{hsu-etal-2026-large, title = "Do Large Multimodal Models Solve Caption Generation for Scientific Figures? Lessons Learned from {S}ci{C}ap Challenge 2023", author = "Hsu, Ting-Yao and Hsu, Yi-Li and Rohatgi, Shaurya and Huang, Chieh-Yang and Ng, Ho Yin Sam and Rossi, Ryan and Kim, Sungchul and Yu, Tong and Ku, Lun-Wei and Giles, Clyde Lee and Huang, Ting-Hao", journal = "Transactions of the Association for Computational Linguistics", volume = "14", year = "2026", address = "Cambridge, MA", publisher = "MIT Press", url = "https://preview.aclanthology.org/ingest-latest-mitpress-cl-tacl/2026.tacl-1.12/", doi = "10.1162/tacl.a.653", pages = "233--252" }