@inproceedings{dakota-kubler-2024-bits,
title = "Bits and Pieces: Investigating the Effects of Subwords in Multi-task Parsing across Languages and Domains",
author = {Dakota, Daniel and
K{\"u}bler, Sandra},
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.215/",
pages = "2397--2409",
abstract = "Neural parsing is very dependent on the underlying language model. However, very little is known about how choices in the language model affect parsing performance, especially in multi-task learning. We investigate questions on how the choice of subwords affects parsing, how subword sharing is responsible for gains or negative transfer in a multi-task setting where each task is parsing of a specific domain of the same language. More specifically, we investigate these issues across four languages: English, German, Italian, and Turkish. We find a general preference for averaged or last subwords across languages and domains. However, specific POS tags may require different subwords, and the distributional overlap between subwords across domains is perhaps a more influential factor in determining positive or negative transfer than discrepancies in the data sizes."
}
Markdown (Informal)
[Bits and Pieces: Investigating the Effects of Subwords in Multi-task Parsing across Languages and Domains](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.215/) (Dakota & Kübler, LREC-COLING 2024)
ACL