@inproceedings{chen-zhao-2025-educsw,
title = "{E}du{CSW}: Building a {M}andarin-{E}nglish Code-Switched Generation Pipeline for Computer Science Learning",
author = "Chen, Ruishi and
Zhao, Yiling",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.bea-1.68/",
pages = "908--919",
ISBN = "979-8-89176-270-1",
abstract = "This paper presents EduCSW, a novel pipeline for generating Mandarin-English code-switched text to support AI-powered educational tools that adapt computer science instruction to learners' language proficiency through mixed-language delivery. To address the scarcity of code-mixed datasets, we propose an encoder-decoder architecture that generates natural code-switched text using only minimal existing code-mixed examples and parallel corpora. Evaluated on a corpus curated for computer science education, human annotators rated 60{--}64{\%} of our model{'}s outputs as natural, significantly outperforming both a baseline fine-tuned neural machine translation (NMT) model (22{--}24{\%}) and the DeepSeek-R1 model (34{--}44{\%}). The generated text achieves a Code-Mixing Index (CMI) of 25.28{\%}, aligning with patterns observed in spontaneous Mandarin-English code-switching. Designed to be generalizable across language pairs and domains, this pipeline lays the groundwork for generating training data to support the development of educational tools with dynamic code-switching capabilities."
}
Markdown (Informal)
[EduCSW: Building a Mandarin-English Code-Switched Generation Pipeline for Computer Science Learning](https://preview.aclanthology.org/landing_page/2025.bea-1.68/) (Chen & Zhao, BEA 2025)
ACL