@inproceedings{simoes-etal-2026-field,
title = "Field of Science and Technology Classification of Academic Documents in {P}ortuguese",
author = "Sim{\~o}es, Ivo and
Oliveira, Hugo Gon{\c{c}}alo and
Correia, Jo{\~a}o",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-dnd/2026.propor-1.104/",
pages = "1021--1026",
ISBN = "979-8-89176-387-6",
abstract = "Towards improving metadata in academic repositories, this study evaluates the efficacy of different transformer-based models in the automatic classification of the Field of Science and Technology (FOS) of academic theses written in Portuguese. We compare the performance of four different encoder models, two multilingual and two Portuguese-specific, against five larger decoder-based LLMs, on a dataset of 9,696 theses characterized by their title, keywords, and abstract. Fine-tuned encoder-based models achieved the best scores (F1 = 88{\%}), outperforming general-purpose decoder models prompted for the task. These results suggest that, for localized academic domains, task-specific fine-tuning remains more effective than general-purpose LLM prompting."
}Markdown (Informal)
[Field of Science and Technology Classification of Academic Documents in Portuguese](https://preview.aclanthology.org/ingest-dnd/2026.propor-1.104/) (Simões et al., PROPOR 2026)
ACL