@inproceedings{jung-jung-2026-empirical,
title = "Empirical Analysis of Task Mixture Effects in Small-scale Instruction Tuning: A Statistical Approach",
author = "Jung, Jeesu and
Jung, Sangkeun",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.643/",
pages = "13168--13186",
ISBN = "979-8-89176-395-1",
abstract = "The performance of large language models heavily depends on instruction tuning, especially on task types and mixture ratios. However, previous research has primarily focused on mixing tasks at fixed ratios, lacking a **systematic and quantitative analysis of task-wise interactions across diverse tasks**. Moreover, it has relied heavily on human labeling. To address these limitations, this study conducts empirical experiments on unlabeled instruction corpora, varying both the number and proportion of task combinations to identify effective mixtures. To minimize manual labeling, we automatically extract five representative tasks{---}programming, math problem solving, history question answering, grammar correction, and creative writing{---}using only a few seed instructions. Across 51 mixtures, we find that 1{--}2 task mixtures work best with small datasets, while synergistic 3-task mixtures excel with larger data. Task interactions reveal both synergy (e.g., programming + math) and interference (e.g., programming + creative writing). These results provide practical guidelines for mixture design tailored to model scale and data size."
}Markdown (Informal)
[Empirical Analysis of Task Mixture Effects in Small-scale Instruction Tuning: A Statistical Approach](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.643/) (Jung & Jung, Findings 2026)
ACL