@inproceedings{bilgin-tasdemir-ozates-2025-nakbatr,
title = "{N}akba{TR}: A {T}urkish {NER} Dataset for Nakba Narratives",
author = {Bilgin Tasdemir, Esma Fat{\i}ma and
{\"O}zate{\c{s}}, {\c{S}}aziye Bet{\"u}l},
editor = "Jarrar, Mustafa and
Habash, Habash and
El-Haj, Mo",
booktitle = "Proceedings of the first International Workshop on Nakba Narratives as Language Resources",
month = jan,
year = "2025",
address = "Abu Dhabi",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.nakbanlp-1.13/",
pages = "122--126",
abstract = "This paper introduces a novel, annotated Named Entity Recognition (NER) dataset derived from a collection of 181 news articles about the Nakba and its witnesses. Given their prominence as a primary source of information on the Nakba in Turkish, news articles were selected as the primary data source. Some 4,032 news sentences are collected from web sites of two news agencies, Anadolu Ajans{\i} and TRTHaber. We applied a filtering process to make sure that only the news which contain witness testimonies regarding the ongoing Nakba are included in the dataset. After a semi-automatic annotation for entities of type Person, Location, and Organization, we obtained a NER dataset of 2,289 PERSON, 5,875 LOCATION, and 1,299 ORGANIZATION tags. We expect the dataset to be useful in several NLP tasks such as sentiment analysis and relation extraction for Nakba event while providing a new language resource for Turkish. As a future work, we aim to improve the dataset by increasing the number of news and entity types."
}
Markdown (Informal)
[NakbaTR: A Turkish NER Dataset for Nakba Narratives](https://preview.aclanthology.org/fix-sig-urls/2025.nakbanlp-1.13/) (Bilgin Tasdemir & Özateş, NakbaNLP 2025)
ACL
- Esma Fatıma Bilgin Tasdemir and Şaziye Betül Özateş. 2025. NakbaTR: A Turkish NER Dataset for Nakba Narratives. In Proceedings of the first International Workshop on Nakba Narratives as Language Resources, pages 122–126, Abu Dhabi. Association for Computational Linguistics.