@inproceedings{xu-etal-2020-autoqa,
title = "{A}uto{QA}: From Databases To {QA} Semantic Parsers With Only Synthetic Training Data",
author = "Xu, Silei and
Semnani, Sina and
Campagna, Giovanni and
Lam, Monica",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.31/",
doi = "10.18653/v1/2020.emnlp-main.31",
pages = "422--434",
abstract = "We propose AutoQA, a methodology and toolkit to generate semantic parsers that answer questions on databases, with no manual effort. Given a database schema and its data, AutoQA automatically generates a large set of high-quality questions for training that covers different database operations. It uses automatic paraphrasing combined with template-based parsing to find alternative expressions of an attribute in different parts of speech. It also uses a novel filtered auto-paraphraser to generate correct paraphrases of entire sentences. We apply AutoQA to the Schema2QA dataset and obtain an average logical form accuracy of 62.9{\%} when tested on natural questions, which is only 6.4{\%} lower than a model trained with expert natural language annotations and paraphrase data collected from crowdworkers. To demonstrate the generality of AutoQA, we also apply it to the Overnight dataset. AutoQA achieves 69.8{\%} answer accuracy, 16.4{\%} higher than the state-of-the-art zero-shot models and only 5.2{\%} lower than the same model trained with human data."
}
Markdown (Informal)
[AutoQA: From Databases To QA Semantic Parsers With Only Synthetic Training Data](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.emnlp-main.31/) (Xu et al., EMNLP 2020)
ACL