@inproceedings{kazemi-vanhari-etal-2025-analyzing,
title = "Analyzing Interview Questions via Bloom{'}s Taxonomy to Enhance the Design Thinking Process",
author = "Kazemi Vanhari, Fatemeh and
Anand, Christopher and
Welch, Charles",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.42/",
pages = "582--593",
ISBN = "979-8-89176-270-1",
abstract = "Interviews are central to the Empathy phase of Design Thinking, helping designers uncover user needs and experience. Although interviews are widely used to support human centered innovation, evaluating their quality, especially from a cognitive perspective, remains underexplored. This study introduces a structured framework for evaluating interview quality in the context of Design Thinking, using Bloom{'}s Taxonomy as a foundation. We propose the Cognitive Interview Quality Score, a composite metric that integrates three dimensions: Effectiveness Score, Bloom Coverage Score, and Distribution Balance Score. Using human-annotations, we assessed 15 interviews across three domains to measure cognitive diversity and structure. We compared CIQS-based rankings with human experts and found that the Bloom Coverage Score aligned more closely with expert judgments. We evaluated the performance of LMA-3-8B-Instruct and GPT-4o-mini, using zero-shot, few-shot, and chain-of-thought prompting, finding GPT-4o-mini, especially in zero-shot mode, showed the highest correlation with human annotations in all domains. Error analysis revealed that models struggled more with mid-level cognitive tasks (e.g., Apply, Analyze) and performed better on Create, likely due to clearer linguistic cues. These findings highlight both the promise and limitations of using NLP models for automated cognitive classification and underscore the importance of combining cognitive metrics with qualitative insights to comprehensively assess interview quality."
}
Markdown (Informal)
[Analyzing Interview Questions via Bloom’s Taxonomy to Enhance the Design Thinking Process](https://preview.aclanthology.org/acl25-workshop-ingestion/2025.bea-1.42/) (Kazemi Vanhari et al., BEA 2025)
ACL