@inproceedings{banski-moszczynski-2008-enhancing,
title = "Enhancing an {E}nglish-{P}olish Electronic Dictionary for Multiword Expression Research",
author = "Ba{\'n}ski, Piotr and
Moszczy{\'n}ski, Rados{\l}aw",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}`08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/L08-1440/",
abstract = "This paper describes a project aimed at converting a legacy representation of English idioms into an XML-based format. The project is set in the context of a large electronic English-Polish dictionary which contains several hundred formalized idiom descriptions and which has been released under the terms of a free license. In short, the project consists of three phases: cleaning up the dictionary markup, extracting the legacy idiom representations, and converting them into TEI P5 XML constrained by a RelaxNG grammar created for this purpose and constituting a module that can be included as part of the TEI P5 schema. The paper contains general descriptions of the individual phases and several examples of XML-encoded idioms. It also suggests some directions for further research, which include abstracting the XML-ized idiom representations into general syntactic patterns and using the representations to automatically identify idioms in tagged corpora."
}
Markdown (Informal)
[Enhancing an English-Polish Electronic Dictionary for Multiword Expression Research](https://preview.aclanthology.org/add-emnlp-2024-awards/L08-1440/) (Bański & Moszczyński, LREC 2008)
ACL