@inproceedings{pilka-etal-2026-metrics,
title = "From Metrics to Meaning: Rule-Grounded {LLM} Explanations for Data Literacy in the Case of Youth Football",
author = "Pi{\l}ka, Tomasz and
Kuczy{\'n}ski, Tomasz and
Czajka, Mateusz",
editor = "Kochmar, Ekaterina and
Alhafni, Bashar and
Bann{\`o}, Stefano and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Anais and
Yaneva, Victoria and
Yuan, Zheng",
booktitle = "Proceedings of the 21st Workshop on Innovative Use of {NLP} for Building Educational Applications ({BEA} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bea-1.34/",
pages = "492--502",
ISBN = "979-8-89176-409-5",
abstract = "Young athletes, parents, and coaches are increasingly exposed to training metrics from wearable technology, yet such metrics are difficult to interpret without contextual explanation. We present a rule-grounded data-to-text framework for supporting data literacy in youth football through concise, stakeholder-specific summaries of training sessions. A rule layer maps duration-normalised indicators to structured facts about session profile, internal intensity, speed exposure, and movement dynamics, which are then verbalised by a large language model for coaches, parents, or players. We compare direct generation from raw metrics, generation from rule-derived facts, and an augmented rule-grounded configuration, ENRICHED, that supplements validated facts with raw metrics and explicit threshold definitions. In this setting, selected open-weight models are additionally adapted using LoRA. The framework is developed using 122 anonymised player-session records from a U15 environment and evaluated on a held-out subset of ten sessions with stakeholder-oriented reference summaries. The results indicate that rule grounding improves reliability and audience adaptation compared with direct generation from raw metrics, particularly by reducing unsupported or overly strong interpretations. A school-based expert evaluation with physical education teachers further suggests that player-facing explanations in the evaluated ENRICHED setting can remain accurate, comprehensible, and practically useful. We position the framework as an interpretable data-literacy support interface for youth sport analytics."
}