@inproceedings{wen-etal-2026-coderise,
title = "{C}ode{R}ise: Bootstrapping {LLM}s for Ultra Low-Resource Programming Languages via Progressive Self-Refinement Curriculum",
author = "Wen, Tengfei and
Chen, Xuanang and
He, Ben and
Cong, Xiaoliang and
Sun, Le",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1840/",
pages = "36929--36942",
ISBN = "979-8-89176-395-1",
abstract = "Large Language Models (LLMs) struggle with code generation for Ultra Low-Resource Programming Languages (ULRPLs) due to the scarcity of training data. Existing synthetic data generation methods fail in this context, suffering from a severe cold-start problem and resulting in samples that lack diversity. To overcome these challenges, we propose CodeRise, a novel two-stage framework that autonomously generates a high-quality, diverse, and progressively complex curriculum for ULRPLs. The framework first tackles the cold-start and distribution issues by leveraging the full formal syntax of the target language as structural guidance and applying a biased sampling strategy over library modules. Building on this foundation, we fine-tune the model to generate increasingly complex code without explicit syntax input, using an adaptive curriculum and multi-turn self-debugging to progressively improve code quality.We evaluate on two ULRPLs, Tengo and Janet, using migrated HumanEval-Tengo and MBPP-Tengo, as well as our new benchmarks, TengoEval and JanetEval. Experiments show that CodeRise significantly outperforms both training-free and training-based baselines in ultra low-resource environments."
}Markdown (Informal)
[CodeRise: Bootstrapping LLMs for Ultra Low-Resource Programming Languages via Progressive Self-Refinement Curriculum](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.1840/) (Wen et al., Findings 2026)
ACL