@inproceedings{bick-2006-turning,
title = "Turning a Dependency Treebank into a {PSG}-style Constituent Treebank",
author = "Bick, Eckhard",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}`06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/L06-1269/",
abstract = "In this paper, we present and evaluate a new method to convert Constraint Grammar (CG) parses of running text into Constituent Treebanks. The conversion is two-step - first a grammar-based method is used to bridge the gap between raw CG annotation and full dependency structure, then phrase structure bracketing and non-terminal nodes are introduced by clustering sister dependents, effectively building one syntactic treebank on top of another. The method is compared with another approach (Bick 2003-2), where constituent structures are arrived at by employing a function-tag based Phrase Structure Grammar (PSG). Results are evaluated on a small reference corpus for both raw and revised CG input, with bracketing F-Scores of 87.5{\%} for raw text and 97.1{\%} for revised CG input, and a raw text edge label accuracy of 95.9{\%} for forms and 86{\%} for functions, or 99.7{\%} and 99.4{\%}, respectively, for revised CG. By applying the tools to the CG-only part of the Danish Arboretum treebank we were able to increase the size of the treebank by 86{\%}, from 197.400 to 367.500 words."
}
Markdown (Informal)
[Turning a Dependency Treebank into a PSG-style Constituent Treebank](https://preview.aclanthology.org/add-emnlp-2024-awards/L06-1269/) (Bick, LREC 2006)
ACL