@inproceedings{dou-etal-2024-stepcoder, title = "{S}tep{C}oder: Improving Code Generation with Reinforcement Learning from Compiler Feedback", author = "Dou, Shihan and Liu, Yan and Jia, Haoxiang and Zhou, Enyu and Xiong, Limao and Shan, Junjie and Huang, Caishuang and Wang, Xiao and Fan, Xiaoran and Xi, Zhiheng and Zhou, Yuhao and Ji, Tao and Zheng, Rui and Zhang, Qi and Gui, Tao and Huang, Xuanjing", editor = "Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek", booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = aug, year = "2024", address = "Bangkok, Thailand", publisher = "Association for Computational Linguistics", url = "https://preview.aclanthology.org/fix-sig-urls/2024.acl-long.251/", doi = "10.18653/v1/2024.acl-long.251", pages = "4571--4585" }