@inproceedings{nourbakhsh-etal-2024-towards,
title = "Towards a new research agenda for multimodal enterprise document understanding: What are we missing?",
author = "Nourbakhsh, Armineh and
Shah, Sameena and
Rose, Carolyn",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-acl.870/",
doi = "10.18653/v1/2024.findings-acl.870",
pages = "14610--14622",
abstract = "The field of multimodal document understanding has produced a suite of models that have achieved stellar performance across several tasks, even coming close to human performance on certain benchmarks. Nevertheless, the application of these models to real-world enterprise datasets remains constrained by a number of limitations. In this position paper, we discuss these limitations in the context of three key aspects of research: dataset curation, model development, and evaluation on downstream tasks. By analyzing 14 datasets and 7 SotA models, we identify major gaps in their utility in the context of a real-world scenario. We demonstrate how each limitation impedes the widespread use of SotA models in enterprise settings, and present a set of research challenges that are motivated by these limitations. Lastly, we propose a research agenda that is aimed at driving the field towards higher impact in enterprise applications."
}
Markdown (Informal)
[Towards a new research agenda for multimodal enterprise document understanding: What are we missing?](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.findings-acl.870/) (Nourbakhsh et al., Findings 2024)
ACL