@inproceedings{ghayoomi-etal-2014-constituency,
title = "Constituency Parsing of {B}ulgarian: Word- vs Class-based Parsing",
author = "Ghayoomi, Masood and
Simov, Kiril and
Osenova, Petya",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}`14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1547/",
pages = "4056--4060",
abstract = "In this paper, we report the obtained results of two constituency parsers trained with BulTreeBank, an HPSG-based treebank for Bulgarian. To reduce the data sparsity problem, we propose using the Brown word clustering to do an off-line clustering and map the words in the treebank to create a class-based treebank. The observations show that when the classes outnumber the POS tags, the results are better. Since this approach adds on another dimension of abstraction (in comparison to the lemma), its coarse-grained representation can be used further for training statistical parsers."
}
Markdown (Informal)
[Constituency Parsing of Bulgarian: Word- vs Class-based Parsing](https://preview.aclanthology.org/jlcl-multiple-ingestion/L14-1547/) (Ghayoomi et al., LREC 2014)
ACL