@inproceedings{gupta-etal-2025-codescm,
title = "{C}ode{SCM}: Causal Analysis for Multi-Modal Code Generation",
author = "Gupta, Mukur and
Bhatt, Noopur and
Jana, Suman",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.345/",
pages = "6779--6793",
ISBN = "979-8-89176-189-6",
abstract = "In this paper, we propose CodeSCM, a Structural Causal Model (SCM) for analyzing multi-modal code generation using large language models (LLMs). By applying interventions to CodeSCM, we measure the causal effects of different prompt modalities, such as natural language, code, and input-output examples, on the model. CodeSCM introduces latent mediator variables to separate the code and natural language semantics of a multi-modal code generation prompt. Using the principles of Causal Mediation Analysis on these mediators we quantify direct effects representing the model{'}s spurious leanings. We find that, in addition to natural language instructions, input-output examples significantly influence code generation."
}
Markdown (Informal)
[CodeSCM: Causal Analysis for Multi-Modal Code Generation](https://preview.aclanthology.org/fix-sig-urls/2025.naacl-long.345/) (Gupta et al., NAACL 2025)
ACL
- Mukur Gupta, Noopur Bhatt, and Suman Jana. 2025. CodeSCM: Causal Analysis for Multi-Modal Code Generation. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 6779–6793, Albuquerque, New Mexico. Association for Computational Linguistics.