@inproceedings{esashika-etal-2026-discovery,
title = "Discovery of Legal Patterns in Civil Petitions via {LLM}-Based Fact Extraction and Density Clustering",
author = "Esashika, Rhedson and
Figueiredo, Carlos M. S. and
Melo, Tiago de",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-dnd/2026.propor-1.41/",
pages = "416--424",
ISBN = "979-8-89176-387-6",
abstract = "The analysis of unstructured civil petitions is often hindered by procedural noise and verbose argumentation. To address this, we propose a pipeline composed of LLM-based fact extraction followed by legal-domain embeddings of texts for unsupervised density clustering. We employ Large Language Models to isolate factual narratives from raw texts, which are then encoded using domain-specific representations (Legal-BERT) and grouped via UMAP dimensionality reduction and the HDBSCAN algorithm. Comparative experiments on a Brazilian judicial corpus reveal that clustering based solely on extracted yields significantly more cohesive and semantically well-defined groups than, which suffer from fragmentation due to content variability. Results indicate that the proposed method is a promising approach for thematic organization, procedural triage support, and large-scale discovery of legal patterns."
}Markdown (Informal)
[Discovery of Legal Patterns in Civil Petitions via LLM-Based Fact Extraction and Density Clustering](https://preview.aclanthology.org/ingest-dnd/2026.propor-1.41/) (Esashika et al., PROPOR 2026)
ACL