@inproceedings{fadlon-bar-2026-much,
title = "How Much Pretraining Does Structured Data Need?",
author = "Fadlon, Daniel and
Bar, Kfir",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.154/",
pages = "3352--3365",
ISBN = "979-8-89176-380-7",
abstract = "Large language models (LLMs) are increasingly adopted for handling structured data, including tabular and relational inputs, despite mostly being pretrained on unstructured text. This raises a key question: how effectively do pretrained representations from language-focused LLMs transfer to tasks involving structured inputs? We address this through controlled experiments using two small open-source LLMs, systematically re-initializing subsets of layers with random weights before fine-tuning on structured datasets and comparing results to unstructured datasets. Our analyses show that, for structured data, most pretrained depth contributes little, with performance often saturating after the first few layers, whereas unstructured tasks benefit more consistently from deeper pretrained representations. Pretraining remains useful mainly in low-resource settings, with its impact diminishing as more training data becomes available."
}Markdown (Informal)
[How Much Pretraining Does Structured Data Need?](https://preview.aclanthology.org/ingest-eacl/2026.eacl-long.154/) (Fadlon & Bar, EACL 2026)
ACL
- Daniel Fadlon and Kfir Bar. 2026. How Much Pretraining Does Structured Data Need?. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 3352–3365, Rabat, Morocco. Association for Computational Linguistics.