@inproceedings{yang-etal-2026-patentvision,
title = "{P}atent{V}ision: A multimodal method for drafting patent applications",
author = "Yang, Ruo and
Mudhiganti, Sai Krishna Reddy and
Sharma, Manali",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-industry.29/",
pages = "397--405",
ISBN = "979-8-89176-384-5",
abstract = "Patent drafting is complex due to its need for detailed technical descriptions, legal compliance, and visual elements. Although Large Vision-Language Models (LVLMs) show promise across various tasks, their application in automating patent writing remains underexplored. In this paper, we present PatentVision, a multimodal framework that integrates textual and visual inputs{---}such as patent claims and drawings{---}to generate complete patent specifications. Built on advanced LVLMs, PatentVision enhances accuracy by combining fine-tuned vision-language models with domain-specific training tailored to patents. Experiments reveal it surpasses text-only methods, producing outputs with greater fidelity and alignment with human-written standards. Its incorporation of visual data allows it to better represent intricate design features and functional connections, leading to richer and more precise results. This study underscores the value of multimodal techniques in patent automation, providing a scalable tool to reduce manual workloads and improve consistency. PatentVision not only advances patent drafting but also lays groundwork for broader use of LVLMs in specialized areas, potentially transforming intellectual property management and innovation processes."
}Markdown (Informal)
[PatentVision: A multimodal method for drafting patent applications](https://preview.aclanthology.org/ingest-eacl/2026.eacl-industry.29/) (Yang et al., EACL 2026)
ACL
- Ruo Yang, Sai Krishna Reddy Mudhiganti, and Manali Sharma. 2026. PatentVision: A multimodal method for drafting patent applications. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track), pages 397–405, Rabat, Morocco. Association for Computational Linguistics.