@inproceedings{kister-schirmer-2026-evaluating,
title = "Evaluating Open-Source {LLM}s for Text Summarization and Named Entity Recognition in Long, Unstructured Text",
author = "Kister, Pauline and
Schirmer, Miriam",
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.35/",
pages = "390--410",
ISBN = "979-8-89176-427-9",
abstract = "This work investigates the extent to which open-source Large Language Models (LLMs) can improve accessibility of unstructured historical documents by performing abstractive summarization and fine-grained Named Entity Recognition (NER) for role classification and violation types. We evaluate open-source LLMs in zero-shot settings and apply these tasks to witness testimonies collected by the South African Truth and Reconciliation Commission (TRC), which archived a large body of text documenting human rights violations during apartheid. Despite their historical significance, these texts are difficult to access due to their length, lack of standardized structure, and the absence of systematic indexing.Open-source LLMs show strong performance in summarization, with most models surpassing non-LLM baselines (maximum BERTScore 0.77), while NER performance remains limited (maximum F1-score 0.61). Results suggest a trade-off in which stylistic fluency is prioritized over factual precision. A two-stage pipeline, summarization followed by NER on LLM summaries, leads to measurable improvements."
}Markdown (Informal)
[Evaluating Open-Source LLMs for Text Summarization and Named Entity Recognition in Long, Unstructured Text](https://preview.aclanthology.org/ingest-acl-workshops/2026.nlp4dh-1.35/) (Kister & Schirmer, NLP4DH 2026)
ACL