@inproceedings{de-kuthy-etal-2025-automatic,
title = "Automatic concept extraction for learning domain modeling: A weakly supervised approach using contextualized word embeddings",
author = "De Kuthy, Kordula and
Girrbach, Leander and
Meurers, Detmar",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.13/",
pages = "175--185",
ISBN = "979-8-89176-270-1",
abstract = "Heterogeneity in student populations poses achallenge in formal education, with adaptivetextbooks offering a potential solution by tai-loring content based on individual learner mod-els. However, creating domain models for text-books typically demands significant manual ef-fort. Recent work by Chau et al. (2021) demon-strated automated concept extraction from dig-ital textbooks, but relied on costly domain-specific manual annotations. This paper in-troduces a novel, scalable method that mini-mizes manual effort by combining contextu-alized word embeddings with weakly super-vised machine learning. Our approach clustersword embeddings from textbooks and identi-fies domain-specific concepts using a machinelearner trained on concept seeds automaticallyextracted from Wikipedia. We evaluate thismethod using 28 economics textbooks, com-paring its performance against a tf-idf baseline,a supervised machine learning baseline, theRAKE keyword extraction method, and humandomain experts. Results demonstrate that ourweakly supervised method effectively balancesaccuracy with reduced annotation effort, offer-ing a practical solution for automated conceptextraction in adaptive learning environments."
}
Markdown (Informal)
[Automatic concept extraction for learning domain modeling: A weakly supervised approach using contextualized word embeddings](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.13/) (De Kuthy et al., BEA 2025)
ACL