@inproceedings{abdessaied-etal-2024-olvit,
    title = "{OLV}i{T}: Multi-Modal State Tracking via Attention-Based Embeddings for Video-Grounded Dialog",
    author = "Abdessaied, Adnen  and
      Hochmeister, Manuel  and
      Bulling, Andreas",
    editor = "Calzolari, Nicoletta  and
      Kan, Min-Yen  and
      Hoste, Veronique  and
      Lenci, Alessandro  and
      Sakti, Sakriani  and
      Xue, Nianwen",
    booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
    month = may,
    year = "2024",
    address = "Torino, Italia",
    publisher = "ELRA and ICCL",
    url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.1081/",
    pages = "12348--12358"
}