@inproceedings{patil-etal-2025-reliable,
title = "Reliable Inline Code Documentation with {LLM}s: Fine-Grained Evaluation of Comment Quality and Coverage",
author = "Patil, Rohan and
Tirodkar, Gaurav and
Gatfane, Shubham",
editor = "Akter, Mousumi and
Chowdhury, Tahiya and
Eger, Steffen and
Leiter, Christoph and
Opitz, Juri and
{\c{C}}ano, Erion",
booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.eval4nlp-1.4/",
pages = "40--54",
ISBN = "979-8-89176-305-0",
abstract = "Code documentation plays a vital role in enhancing collaboration, maintainability, and comprehension throughout the software development lifecycle. This becomes especially critical in legacy codebases, where missing or outdated comments hinder effective debugging and onboarding. Among documentation types, inline comments are particularly valuable for conveying program logic and supporting code reuse. With the growing capabilities of large language models (LLMs), their application to tasks such as code understanding and summarization has gained significant attention in the NLP community. However, the specific task of generating high-quality inline code comments using LLMs remains relatively under-explored. In this work, we conduct a systematic evaluation of several state-of-the-art LLMs to assess their effectiveness in producing meaningful and context-aware inline documentation. To this end, we curate a dataset of well-documented code snippets and propose a fine-grained evaluation framework that assesses both the quality and sufficiency of generated comments at the statement level. We further investigate the impact of prompting strategies and offer a comparative analysis across a range of models, including large foundational LLMs to smaller, code-specialized variants, within the domain of inline code documentation. Our findings offer actionable insights that can guide the development of effective and scalable systems for automated inline code documentation."
}Markdown (Informal)
[Reliable Inline Code Documentation with LLMs: Fine-Grained Evaluation of Comment Quality and Coverage](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.eval4nlp-1.4/) (Patil et al., Eval4NLP 2025)
ACL