@inproceedings{jain-etal-2024-gesnavi,
title = "{G}es{N}avi: Gesture-guided Outdoor Vision-and-Language Navigation",
author = "Jain, Aman and
Misu, Teruhisa and
Yamada, Kentaro and
Yanaka, Hitomi",
editor = "Falk, Neele and
Papi, Sara and
Zhang, Mike",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2024.eacl-srw.23/",
pages = "290--295",
abstract = "Vision-and-Language Navigation (VLN) task involves navigating mobility using linguistic commands and has application in developing interfaces for autonomous mobility. In reality, natural human communication also encompasses non-verbal cues like hand gestures and gaze. These gesture-guided instructions have been explored in Human-Robot Interaction systems for effective interaction, particularly in object-referring expressions. However, a notable gap exists in tackling gesture-based demonstrative expressions in outdoor VLN task. To address this, we introduce a novel dataset for gesture-guided outdoor VLN instructions with demonstrative expressions, designed with a focus on complex instructions requiring multi-hop reasoning between the multiple input modalities. In addition, our work also includes a comprehensive analysis of the collected data and a comparative evaluation against the existing datasets."
}
Markdown (Informal)
[GesNavi: Gesture-guided Outdoor Vision-and-Language Navigation](https://preview.aclanthology.org/ingest_wac_2008/2024.eacl-srw.23/) (Jain et al., EACL 2024)
ACL
- Aman Jain, Teruhisa Misu, Kentaro Yamada, and Hitomi Yanaka. 2024. GesNavi: Gesture-guided Outdoor Vision-and-Language Navigation. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 290–295, St. Julian’s, Malta. Association for Computational Linguistics.