@inproceedings{ilinykh-szawerna-2025-need,
title = "``{I} Need More Context and an {E}nglish Translation'': Analysing How {LLM}s Identify Personal Information in {K}omi, {P}olish, and {E}nglish",
author = "Ilinykh, Nikolai and
Szawerna, Maria Irena",
editor = "Holdt, {\v{S}}pela Arhar and
Ilinykh, Nikolai and
Scalvini, Barbara and
Bruton, Micaella and
Debess, Iben Nyholm and
Tudor, Crina Madalina",
booktitle = "Proceedings of the Third Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library, Estonia",
url = "https://preview.aclanthology.org/fix-sig-urls/2025.resourceful-1.32/",
pages = "165--178",
ISBN = "978-9908-53-121-2",
abstract = "Automatic identification of personal information (PI) is particularly difficult for languages with limited linguistic resources. Recently, large language models (LLMs) have been applied to various tasks involving low-resourced languages, but their capability to process PI in such contexts remains under-explored. In this paper we provide a qualitative analysis of the outputs from three LLMs prompted to identify PI in texts written in Komi (Permyak and Zyrian), Polish, and English. Our analysis highlights challenges in using pre-trained LLMs for PI identification in both low- and medium-resourced languages. It also motivates the need to develop LLMs that understand the differences in how PI is expressed across languages with varying levels of availability of linguistic resources."
}
Markdown (Informal)
[“I Need More Context and an English Translation”: Analysing How LLMs Identify Personal Information in Komi, Polish, and English](https://preview.aclanthology.org/fix-sig-urls/2025.resourceful-1.32/) (Ilinykh & Szawerna, RESOURCEFUL 2025)
ACL