@inproceedings{kalohia-2026-leveraging,
title = "Leveraging Generative {AI} for Extracting Business Requirements from Legacy {COBOL} and {PL}/{I} Code",
author = "Kalohia, Ankur",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.4/",
pages = "43--55",
ISBN = "979-8-89176-394-4",
abstract = "Recovering business requirements fromCOBOL and PL/I portfolios is difficult becauselogic is scattered across interdependentprograms and data definitions, and existinganalyses seldom yield stakeholder-facingartifacts. We introduce an LLM-augmentedreverse-engineering pipeline that providesdeterministic parsing, schema-constrainedLLM generation with bidirectional traceabilityto code. It couples grammar-based parsingand control-flow and data-flow analysis with alarge language model to translate an enrichedintermediate representation into structuredspecifications. This is not raw-code promptingor generic summarization, the novelty is theLLM-centered generation over an enriched IR,with structured JSON outputs and traceabilityfor compliance-sensitive settings. The pipelineproduces business requirements documents,explicit rule catalogs, end-to-end data lineage,create{--}read{--}update{--}delete matrices, and field-level source-to-target mappings, each linkedto the supporting code. In a financial industrysetting, containing 3.4M+ LoC includingcomments / 3.2M excluding comments ofCOBOL, the system achieves 93{\%} agreementwith expert-authored business rules andreduces documentation effort by approximately70{\%}, as measured against manually producedrequirement documents and rule sets. On theinternal corpus spanning 3.4M lines acrossonline, batch, and job control workloads, theapproach yields approximately 3.2{--}3.3{\texttimes} fasteranalysis while improving artifact consistencyand traceability."
}Markdown (Informal)
[Leveraging Generative AI for Extracting Business Requirements from Legacy COBOL and PL/I Code](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.4/) (Kalohia, ACL 2026)
ACL