@inproceedings{kokot-poelman-2025-type,
title = "Type and Complexity Signals in Multilingual Question Representations",
author = "Kokot, Robin and
Poelman, Wessel",
editor = "Adelani, David Ifeoluwa and
Arnett, Catherine and
Ataman, Duygu and
Chang, Tyler A. and
Gonen, Hila and
Raja, Rahul and
Schmidt, Fabian and
Stap, David and
Wang, Jiayi",
booktitle = "Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025)",
month = nov,
year = "2025",
address = "Suzhuo, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.mrl-main.28/",
doi = "10.18653/v1/2025.mrl-main.28",
pages = "411--425",
ISBN = "979-8-89176-345-6",
abstract = "This work investigates how a multilingual transformer model represents morphosyntactic properties of questions. We introduce the Question Type and Complexity (QTC) dataset with sentences across seven languages, annotated with type information and complexity metrics including dependency length, tree depth, and lexical density. Our evaluation extends probing methods to regression labels with selectivity controls to quantify gains in generalizability. We compare layer-wise probes on frozen Glot500-m (Imani et al., 2023) representations against subword TF-IDF baselines, and a fine-tuned model. Results show that statistical features classify questions well in explicitly marked languages and structural complexity prediction, while neural probes lead on individual metrics. We use these results to evaluate when contextual representations outperform statistical baselines and whether parameter updates reduce availability of pre-trained linguistic information."
}Markdown (Informal)
[Type and Complexity Signals in Multilingual Question Representations](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.mrl-main.28/) (Kokot & Poelman, MRL 2025)
ACL