@article{talukder-shahariar-2026-bangla,
title = "{B}angla {K}ey2{T}ext: Text Generation from Keywords for a Low Resource Language",
author = "Talukder, Tonmoy and
Shahariar, G M",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.303/",
pages = "3805--3822",
abstract = "This paper introduces Bangla Key2Text, a large-scale dataset of 2.6 million Bangla keyword-text pairs designed for keyword-driven text generation in a low-resource language. The dataset is constructed using a BERT-based keyword extraction pipeline applied to millions of Bangla news texts, transforming raw articles into structured keyword-text pairs suitable for supervised learning. To establish baseline performance on this new benchmark, we fine-tune two sequence-to-sequence models, mT5 and BanglaT5, and evaluate them using multiple automatic metrics and human judgments. Experimental results show that task-specific fine-tuning substantially improves keyword-conditioned text generation in Bangla compared to zero-shot large language models. The dataset, trained models, and code are publicly released to support future research in Bangla natural language generation and keyword-to-text generation tasks."
}Markdown (Informal)
[Bangla Key2Text: Text Generation from Keywords for a Low Resource Language](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.303/) (Talukder & Shahariar, LREC 2026)
ACL