@inproceedings{d-etal-2025-seqtns,
title = "{S}eq{TNS}: Sequential Tolerance-based Classifier for Identification of Rhetorical Roles in {I}ndian Legal Documents",
author = "D, Arjun T and
Madasamy, Anand Kumar and
Ramanna, Sheela",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.50/",
pages = "837--847",
ISBN = "979-8-89176-303-6",
abstract = "Identifying rhetorical roles in legal judgments is a foundational step for automating legal reasoning, summarization, and retrieval. In this paper, we propose a novel Sequential Tolerance-based Classifier (SeqTNS) for rhetorical role classification in Indian legal documents. The proposed classifier leverages semantic similarity and contextual dependencies by using label sequence aware BiLSTMs on top of word embeddings from finetuned InLegalBERT model. These enriched embeddings are clustered into tolerance classes via a tolerance relation using a cosine distance threshold,enabling the model to make flexible, similarity-based predictions. We evaluate SeqTNS on two benchmark datasets annotated with thirteen and seven rhetorical roles, respectively. The proposed method outperforms fine-tuned transformer baselines (LegalBERT, InLegalBERT) as well as the previously developed tolerance relation-based (TNS) model, achieving a weighted F1 score of 0.78 on thirteen class dataset and a macro F1 of 0.83 on the seven class dataset, while reducing training time by 39-40{\%} compared to state of the art BiLSTM-CRF models. The larger of our two datasets is substantial, containing over 40,000 sentences and 1.3M tokens, and serves as a challenging real world benchmark. Additionally, we use LIME for explainability and t-SNE to validate the coherence of tolerance-based clusters."
}Markdown (Informal)
[SeqTNS: Sequential Tolerance-based Classifier for Identification of Rhetorical Roles in Indian Legal Documents](https://preview.aclanthology.org/ingest-ijcnlp-aacl/2025.findings-ijcnlp.50/) (D et al., Findings 2025)
ACL