@inproceedings{wang-etal-2024-ragviz,
title = "{RAGV}iz: Diagnose and Visualize Retrieval-Augmented Generation",
author = "Wang, Tevin and
He, Jingyuan and
Xiong, Chenyan",
editor = "Hernandez Farias, Delia Irazu and
Hope, Tom and
Li, Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-demo.33/",
doi = "10.18653/v1/2024.emnlp-demo.33",
pages = "320--327",
abstract = "Retrieval-augmented generation (RAG) combines knowledge from domain-specific sources into large language models to ground answer generation. Current RAG systems lack customizable visibility on the context documents and the model{'}s attentiveness towards such documents. We propose RAGViz, a RAG diagnosis tool that visualizes the attentiveness of the generated tokens in retrieved documents. With a built-in user interface, retrieval index, and Large Language Model (LLM) backbone, RAGViz provides two main functionalities: (1) token and document-level attention visualization, and (2) generation comparison upon context document addition and removal. As an open-source toolkit, RAGViz can be easily hosted with a custom embedding model and HuggingFace-supported LLM backbone. Using a hybrid ANN (Approximate Nearest Neighbor) index, memory-efficient LLM inference tool, and custom context snippet method, RAGViz operates efficiently with a median query time of about 5 seconds on a moderate GPU node. Our code is available at https://github.com/cxcscmu/RAGViz. A demo video of RAGViz can be found at https://youtu.be/cTAbuTu6ur4."
}
Markdown (Informal)
[RAGViz: Diagnose and Visualize Retrieval-Augmented Generation](https://preview.aclanthology.org/fix-sig-urls/2024.emnlp-demo.33/) (Wang et al., EMNLP 2024)
ACL
- Tevin Wang, Jingyuan He, and Chenyan Xiong. 2024. RAGViz: Diagnose and Visualize Retrieval-Augmented Generation. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 320–327, Miami, Florida, USA. Association for Computational Linguistics.