@inproceedings{blair-stanek-etal-2024-blt,
title = "{BLT}: Can Large Language Models Handle Basic Legal Text?",
author = "Blair-Stanek, Andrew and
Holzenberger, Nils and
Van Durme, Benjamin",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goanț{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preoțiuc-Pietro, Daniel and
Spanakis, Gerasimos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2024",
month = nov,
year = "2024",
address = "Miami, FL, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2024.nllp-1.18/",
doi = "10.18653/v1/2024.nllp-1.18",
pages = "216--232",
abstract = "We find that the best publicly available LLMs like GPT-4 and Claude currently perform poorly on basic legal text handling. This motivates the creation of a benchmark consisting of examples that lawyers and paralegals would expect LLMs to handle zero-shot, such as looking up the text at a line of a witness deposition or at a subsection of a contract. LLMs' poor performance on this benchmark casts into doubt their reliability as-is for legal practice. However, fine-tuning on our training set brings even a small model to near-perfect performance. This benchmark will be useful for fine-tuning LLMs for downstream legal tasks, as well as for tracking LLMs' reliability as-is for basic legal tasks."
}
Markdown (Informal)
[BLT: Can Large Language Models Handle Basic Legal Text?](https://preview.aclanthology.org/add-emnlp-2024-awards/2024.nllp-1.18/) (Blair-Stanek et al., NLLP 2024)
ACL