@inproceedings{trips-2016-syntactic,
title = "Syntactic Analysis of Phrasal Compounds in Corpora: a Challenge for {NLP} Tools",
author = "Trips, Carola",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/fix-sig-urls/L16-1174/",
pages = "1092--1097",
abstract = "The paper introduces a ``train once, use many'' approach for the syntactic analysis of phrasal compounds (PC) of the type XP+N like ``Would you like to sit on my knee?'' nonsense. PCs are a challenge for NLP tools since they require the identification of a syntactic phrase within a morphological complex. We propose a method which uses a state-of-the-art dependency parser not only to analyse sentences (the environment of PCs) but also to compound the non-head of PCs in a well-defined particular condition which is the analysis of the non-head spanning from the left boundary (mostly marked by a determiner) to the nominal head of the PC. This method contains the following steps: (a) the use an English state-of-the-art dependency parser with data comprising sentences with PCs from the British National Corpus (BNC), (b) the detection of parsing errors of PCs, (c) the separate treatment of the non-head structure using the same model, and (d) the attachment of the non-head to the compound head. The evaluation of the method showed that the accuracy of 76{\%} could be improved by adding a step in the PC compounder module which specified user-defined contexts being sensitive to the part of speech of the non-head parts and by using TreeTagger, in line with our approach."
}
Markdown (Informal)
[Syntactic Analysis of Phrasal Compounds in Corpora: a Challenge for NLP Tools](https://preview.aclanthology.org/fix-sig-urls/L16-1174/) (Trips, LREC 2016)
ACL