@inproceedings{mubarak-hassan-2021-ul2c,
title = "{UL}2{C}: Mapping User Locations to Countries on {A}rabic {T}witter",
author = "Mubarak, Hamdy and
Hassan, Sabit",
editor = "Habash, Nizar and
Bouamor, Houda and
Hajj, Hazem and
Magdy, Walid and
Zaghouani, Wajdi and
Bougares, Fethi and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Touileb, Samia",
booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop",
month = apr,
year = "2021",
address = "Kyiv, Ukraine (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.wanlp-1.15/",
pages = "145--153",
abstract = "Mapping user locations to countries can be useful for many applications such as dialect identification, author profiling, recommendation system, etc. Twitter allows users to declare their locations as free text, and these user-declared locations are often noisy and hard to decipher automatically. In this paper, we present the largest manually labeled dataset for mapping user locations on Arabic Twitter to their corresponding countries. We build effective machine learning models that can automate this mapping with significantly better efficiency compared to libraries such as geopy. We also show that our dataset is more effective than data extracted from GeoNames geographical database in this task as the latter covers only locations written in formal ways."
}
Markdown (Informal)
[UL2C: Mapping User Locations to Countries on Arabic Twitter](https://preview.aclanthology.org/fix-sig-urls/2021.wanlp-1.15/) (Mubarak & Hassan, WANLP 2021)
ACL