@inproceedings{lee-etal-2025-ciflex,
title = "{CIFLEX}: Contextual Instruction Flow for Sub-task Execution in Multi-Turn Interactions with a Single On-Device {LLM}",
author = "Lee, Juntae and
Bang, Jihwan and
Yang, Seunghan and
Chang, Simyung",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.533/",
pages = "10556--10570",
ISBN = "979-8-89176-332-6",
abstract = "We present CIFLEX (Contextual Instruction FLow with EXecution), a novel execution system for efficient sub-task handling in multi-turn interactions with a single on-device large language model (LLM). As LLMs become increasingly capable, a single model is expected to handle diverse sub-tasks that more effectively and comprehensively support answering user requests. Naive approach reprocesses the entire conversation context when switching between main and sub-tasks (e.g., query rewriting, summarization), incurring significant computational overhead. CIFLEX mitigates this overhead by reusing the key-value (KV) cache from the main task and injecting only task-specific instructions into isolated side paths. After sub-task execution, the model rolls back to the main path via cached context, thereby avoiding redundant prefill computation. To support sub-task selection, we also develop a hierarchical classification strategy tailored for small-scale models, decomposing multi-choice decisions into binary ones. Experiments show that CIFLEX significantly reduces computational costs without degrading task performance, enabling scalable and efficient multi-task dialogue on-device."
}Markdown (Informal)
[CIFLEX: Contextual Instruction Flow for Sub-task Execution in Multi-Turn Interactions with a Single On-Device LLM](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-main.533/) (Lee et al., EMNLP 2025)
ACL