@inproceedings{wang-2026-fbs,
title = "{FBS}: Modeling Native Parallel Reading inside a Transformer",
author = "Wang, Tongxi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.findings-acl.200/",
pages = "4106--4137",
ISBN = "979-8-89176-395-1",
abstract = "Large language models (LLMs) excel across many tasks, yet inference is still dominated by strictly token-by-token autoregression. Existing acceleration methods largely patch this pipeline and miss core human-reading ingredients: content-adaptive foresight, chunk-structure-aware compute allocation, and train{--}test consistency for preview/skimming. We propose the \textbf{Fovea{--}Block{--}Skip Transformer} (FBS), which injects a causal, trainable loop into Transformers via Parafovea-Attention Window (PAW), Chunk-Head (CH), and Skip-Gate (SG). Across diverse benchmarks, FBS improves the quality-efficiency trade-off without increasing parameters, and ablations show the three modules are complementary."
}Markdown (Informal)
[FBS: Modeling Native Parallel Reading inside a Transformer](https://preview.aclanthology.org/ingest-acl/2026.findings-acl.200/) (Wang, Findings 2026)
ACL