@inproceedings{antunes-etal-2025-european,
title = "A {E}uropean {P}ortuguese corpus annotated for verbal idioms",
author = "Antunes, David and
Baptista, Jorge and
Mamede, Nuno J.",
editor = {Ojha, Atul Kr. and
Giouli, Voula and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Korvel, Gra{\v{z}}ina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, U.S.A.",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.mwe-1.7/",
pages = "58--66",
ISBN = "979-8-89176-243-5",
abstract = "This paper presents the construction of VIDiom-PT, a corpus in European Portuguese annotated for verbal idioms (e.g. O Rui bateu a bota, lit.: Rui hit the boot {\textquoteleft}Rui died'). This linguistic resource aims to support the development of systems capable of processing such constructions in this language variety. To assist in the annotation effort, two tools were built. The first allows for the detection of possible instances of verbal idioms in texts, while the second provides a graphical interface for annotating them. This effort culminated in the annotation of a total of 5,178 instances of 747 different verbal idioms in more than 200,000 sentences in European Portuguese. A highly reliable inter-annotator agreement was achieved, using Krippendorff{'}s alpha for nominal data (0.869) with 5{\%} of the data independently annotated by 3 experts. Part of the annotated corpus is also made publicly available."
}
Markdown (Informal)
[A European Portuguese corpus annotated for verbal idioms](https://preview.aclanthology.org/landing_page/2025.mwe-1.7/) (Antunes et al., MWE 2025)
ACL