@inproceedings{someya-etal-2025-live,
title = "Live Football Commentary ({LFC}): A {L}arge{-}{S}cale Dataset for Building Football Commentary Generation Models",
author = "Someya, Taiga and
Ishigaki, Tatsuya and
Takamura, Hiroya",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-luhme/2025.inlg-main.13/",
pages = "195--201",
abstract = "Live football commentary brings the atmosphere and excitement of matches to fans in real time, but producing it requires costly professional announcers. We address this challenge by formulating commentary generation from player- and ball-tracking coordinates as a new language{--}generation task. To facilitate research on this problem we compile the \textit{Live Football Commentary (LFC)} dataset, 12,440 time-stamped Japanese utterances aligned with tracking data for 40 J1 League matches ( 60 h). We benchmark three LLM-based baselines that receive the tracking data (i) as plain text, (ii) as pitch-map images, or (iii) in both modalities. Human evaluation shows that the text encoding already outperforms image and multimodal variants in both accuracy and relevance, indicating that current LLMs exploit structured coordinates more effectively than raw visuals. We release the LFC transcripts and evaluation code to establish a public test bed and spur future work on tracking-based commentary generation, saliency detection, and cross-modal integration."
}Markdown (Informal)
[Live Football Commentary (LFC): A Large‐Scale Dataset for Building Football Commentary Generation Models](https://preview.aclanthology.org/ingest-luhme/2025.inlg-main.13/) (Someya et al., INLG 2025)
ACL