@inproceedings{stoyanova-2014-automatic,
title = "Automatic Categorisation of Multiword Expressions and Named Entities in {B}ulgarian",
author = "Stoyanova, Ivelina",
booktitle = "Proceedings of the First International Conference on Computational Linguistics in Bulgaria (CLIB 2014)",
month = sep,
year = "2014",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2014.clib-1.6/",
pages = "40--48",
abstract = "This paper describes an approach for automatic categorisation of various types of multiword expressions (MWEs) with a focus on multiword named entities (MNEs), which compose a large portion of MWEs in general. The proposed algorithm is based on a refined classification of MWEs according to their idiomaticity. While MWE categorisation can be considered as a separate and independent task, it complements the general task of MWE recognition. After outlining the method, we set up an experiment to demonstrate its performance. We use the corpus Wiki1000+ that comprises 6,311 annotated Wikipedia articles of 1,000 or more words each, amounting to 13.4 million words in total. The study also employs a large dictionary of 59,369 MWEs noun phrases (out of more than 85,000 MWEs), labelled with their respective types. The dictionary is compiled automatically and verified semi-automatically. The research presented here is based on Bulgarian although most of the ideas, the methodology and the analysis are applicable to other Slavic and possibly other European languages."
}
Markdown (Informal)
[Automatic Categorisation of Multiword Expressions and Named Entities in Bulgarian](https://preview.aclanthology.org/jlcl-multiple-ingestion/2014.clib-1.6/) (Stoyanova, CLIB 2014)
ACL