@inproceedings{muller-etal-2024-role,
title = "The Role of Adverbs in Language Variety Identification: The Case of {P}ortuguese Multi-Word Adverbs",
author = {M{\"u}ller, Izabela and
Mamede, Nuno and
Baptista, Jorge},
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.vardial-1.8/",
doi = "10.18653/v1/2024.vardial-1.8",
pages = "99--106",
abstract = "This paper aims to assess the role of multiword compound adverbs in distinguishing Brazilian Portuguese (PT-BR) from European Portuguese (PT-PT). Two key factors underpin this focus: Firstly, multiword expressions often provide less ambiguity compared to single words, even when their meaning is idiomatic (non-compositional). Secondly, despite constituting a significant portion of lexicons in many languages, they are frequently overlooked in Natural Language Processing, possibly due to their heterogeneous nature and lexical range.For this study, a large lexicon of Portuguese multiword adverbs (3,665) annotated with diatopic information regarding language variety was utilized. The paper investigates the distribution of this category in a corpus consisting in excerpts from journalistic texts sourced from the DSL (Dialect and Similar Language) corpus, representing Brazilian (PT-BR) and European Portuguese (PT-PT), respectively, each partition containing 18,000 sentences.Results indicate a substantial similarity between the two varieties, with a considerable overlap in the lexicon of multiword adverbs. Additionally, specific adverbs unique to each language variety were identified. Lexical entries recognized in the corpus represent 18.2{\%} (PT-BR) to 19.5{\%} (PT-PT) of the lexicon, and approximately 5,700 matches in each partition. While many of the matches are spurious due to ambiguity with otherwise non-idiomatic, free strings, occurrences of adverbs marked as exclusive to one variety in texts from the other variety are rare."
}
Markdown (Informal)
[The Role of Adverbs in Language Variety Identification: The Case of Portuguese Multi-Word Adverbs](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.vardial-1.8/) (Müller et al., VarDial 2024)
ACL