@inproceedings{scalercio-etal-2026-legalsim,
title = "{L}egal{S}im-{PT}: Building a Dataset for Legal Document Simplification in {P}ortuguese Leveraging Linguistic Metrics",
author = "Scalercio, Arthur and
Finatto, Maria Jos{\'e} and
Paes, Aline",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-dnd/2026.propor-1.39/",
pages = "392--405",
ISBN = "979-8-89176-387-6",
abstract = "Document simplification has recently attracted increasing attention due to its broader practical applicability compared to sentence-level simplification. Beyond simplifying individual sentences, this task involves preserving fluency, conciseness, and coherence across the entire text, often incorporating summarization techniques. Despite its importance, research in this area remains largely concentrated on a few languages, particularly English.In this work, we introduce LegalSim-PT, the first large-scale Portuguese dataset for document simplification based on legal texts. To mitigate reliance on manual evaluation, we combined data augmentation strategies with readability, semantic similarity, and diversity metrics to select the most suitable document pairs. We conducted a comprehensive analysis of the resulting dataset, first characterizing its surface features and comparing them with those of existing simplification corpora. Next, we assessed its quality using automatic metrics, linguistic indicators, and human evaluations. Finally, we select representative models as baselines and fine-tune two models on LegalSim-PT, achieving improved performance in document-level simplification."
}Markdown (Informal)
[LegalSim-PT: Building a Dataset for Legal Document Simplification in Portuguese Leveraging Linguistic Metrics](https://preview.aclanthology.org/ingest-dnd/2026.propor-1.39/) (Scalercio et al., PROPOR 2026)
ACL