@inproceedings{maurer-2026-elfen,
title = "elfen: A Python Package for Efficient Linguistic Feature Extraction for Natural Language Datasets",
author = "Maurer, Maximilian",
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-demo.5/",
pages = "61--74",
ISBN = "979-8-89176-382-1",
abstract = "A detailed understanding of the basic properties of text collections produced by humans or generated synthetically is vital for all steps of the natural language processing system life cycle, from training to evaluating model performance and synthetic texts.To facilitate the analysis of these properties, we introduce elfen, a Python library for efficient linguistic feature extraction for text datasets. It includes the largest set of item-level linguistic features in eleven feature areas: surface-level, POS, lexical richness, readability, named entity, semantic, information-theoretic, emotion, psycholinguistic, dependency, and morphological features. Building on top of popular NLP and modern dataframe libraries, elfen enables feature extraction in various languages (80 at the moment) on thousands of items, even given limited computing resources. We show how using elfen enables linguistically informed data selection, outlier detection, and text collection comparison.We release elfen as an open-source PyPI package, accompanied by extensive documentation, including tutorials. We host the code at https://github.com/mmmaurer/elfen/, make it available through the GESIS Methods Hub at https://methodshub.gesis.org/library/methods/elfen/, and provide documentation and tutorials at https://elfen.readthedocs.io/en/latest/."
}Markdown (Informal)
[elfen: A Python Package for Efficient Linguistic Feature Extraction for Natural Language Datasets](https://preview.aclanthology.org/ingest-eacl/2026.eacl-demo.5/) (Maurer, EACL 2026)
ACL