@inproceedings{wenxuan-etal-2026-freechunker,
title = "{F}ree{C}hunker: A Cross-Granularity Chunking Framework",
author = "Wenxuan, Zhang and
Jiang, Yuan-Hao and
Cao, Yang and
Wu, Yonghe",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.730/",
pages = "14853--14865",
ISBN = "979-8-89176-395-1",
abstract = "Chunking strategies significantly impact the effectiveness of Retrieval-Augmented Generation (RAG) systems. Existing methods operate within fixed-granularity paradigms that rely on static boundary identification, limiting their adaptability to diverse query requirements. This paper presents FreeChunker, a Cross-Granularity Encoding Framework that fundamentally transforms the traditional chunking paradigm: the framework treats sentences as atomic units and shifts from static chunk segmentation to flexible retrieval supporting arbitrary sentence combinations. This paradigm shift not only significantly avoids the computational overhead required for semantic boundary detection, but also enhances adaptability to complex queries. Experimental evaluation on LongBench V2 demonstrates that FreeChunker possesses significant advantages in both retrieval performance and time efficiency compared to existing chunking methods. The pre-trained models and codes are available at https://github.com/mazehart/FreeChunker."
}Markdown (Informal)
[FreeChunker: A Cross-Granularity Chunking Framework](https://preview.aclanthology.org/ingest-acl-workshops/2026.findings-acl.730/) (Wenxuan et al., Findings 2026)
ACL
- Zhang Wenxuan, Yuan-Hao Jiang, Yang Cao, and Yonghe Wu. 2026. FreeChunker: A Cross-Granularity Chunking Framework. In Findings of the Association for Computational Linguistics: ACL 2026, pages 14853–14865, San Diego, California, United States. Association for Computational Linguistics.