@inproceedings{scholak-etal-2021-picard,
title = "{PICARD}: Parsing Incrementally for Constrained Auto-Regressive Decoding from Language Models",
author = "Scholak, Torsten and
Schucher, Nathan and
Bahdanau, Dzmitry",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.779/",
doi = "10.18653/v1/2021.emnlp-main.779",
pages = "9895--9901",
abstract = "Large pre-trained language models for textual data have an unconstrained output space; at each decoding step, they can produce any of 10,000s of sub-word tokens. When fine-tuned to target constrained formal languages like SQL, these models often generate invalid code, rendering it unusable. We propose PICARD (code available at \url{https://github.com/ElementAI/picard}), a method for constraining auto-regressive decoders of language models through incremental parsing. PICARD helps to find valid output sequences by rejecting inadmissible tokens at each decoding step. On the challenging Spider and CoSQL text-to-SQL translation tasks, we show that PICARD transforms fine-tuned T5 models with passable performance into state-of-the-art solutions."
}
Markdown (Informal)
[PICARD: Parsing Incrementally for Constrained Auto-Regressive Decoding from Language Models](https://preview.aclanthology.org/fix-sig-urls/2021.emnlp-main.779/) (Scholak et al., EMNLP 2021)
ACL