@inproceedings{hua-etal-2023-mixed,
title = "Mixed-domain Language Modeling for Processing Long Legal Documents",
author = "Hua, Wenyue and
Zhang, Yuchen and
Chen, Zhe and
Li, Josie and
Weber, Melanie",
editor = "Preoțiuc-Pietro, Daniel and
Goanta, Catalina and
Chalkidis, Ilias and
Barrett, Leslie and
Spanakis, Gerasimos and
Aletras, Nikolaos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.nllp-1.7/",
doi = "10.18653/v1/2023.nllp-1.7",
pages = "51--61",
abstract = "The application of Natural Language Processing (NLP) to specialized domains, such as the law, has recently received a surge of interest. As many legal services rely on processing and analyzing large collections of documents, automating such tasks with NLP tools such as language models emerges as a key challenge since legal documents may contain specialized vocabulary from other domains, such as medical terminology in personal injury text. However, most language models are general-purpose models, which either have limited reasoning capabilities on highly specialized legal terminology and syntax, such as BERT or ROBERTA, or are expensive to run and tune, such as GPT-3.5 and Claude. Thus, in this paper, we propose a specialized language model for personal injury text, LEGALRELECTRA, which is trained on mixed-domain legal and medical corpora. We show that as a small language model, our model improves over general-domain and single-domain medical and legal language models when processing mixed-domain (personal injury) text. Our training architecture implements the ELECTRA framework but utilizes REFORMER instead of BERT for its generator and discriminator. We show that this improves the model{'}s performance on processing long passages and results in better long-range text comprehension."
}
Markdown (Informal)
[Mixed-domain Language Modeling for Processing Long Legal Documents](https://preview.aclanthology.org/fix-sig-urls/2023.nllp-1.7/) (Hua et al., NLLP 2023)
ACL