@inproceedings{alhama-2022-word,
title = "Word Segmentation as Unsupervised Constituency Parsing",
author = "Alhama, Raquel G.",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.283/",
doi = "10.18653/v1/2022.acl-long.283",
pages = "4103--4112",
abstract = "Word identification from continuous input is typically viewed as a segmentation task. Experiments with human adults suggest that familiarity with syntactic structures in their native language also influences word identification in artificial languages; however, the relation between syntactic processing and word identification is yet unclear. This work takes one step forward by exploring a radically different approach of word identification, in which segmentation of a continuous input is viewed as a process isomorphic to unsupervised constituency parsing. Besides formalizing the approach, this study reports simulations of human experiments with DIORA (Drozdov et al., 2020), a neural unsupervised constituency parser. Results show that this model can reproduce human behavior in word identification experiments, suggesting that this is a viable approach to study word identification and its relation to syntactic processing."
}
Markdown (Informal)
[Word Segmentation as Unsupervised Constituency Parsing](https://preview.aclanthology.org/fix-sig-urls/2022.acl-long.283/) (Alhama, ACL 2022)
ACL
- Raquel G. Alhama. 2022. Word Segmentation as Unsupervised Constituency Parsing. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 4103–4112, Dublin, Ireland. Association for Computational Linguistics.