@inproceedings{holt-chisholm-2018-extracting,
title = "Extracting structured data from invoices",
author = "Holt, Xavier and
Chisholm, Andrew",
editor = "Kim, Sunghwan Mac and
Zhang, Xiuzhen (Jenny)",
booktitle = "Proceedings of the Australasian Language Technology Association Workshop 2018",
month = dec,
year = "2018",
address = "Dunedin, New Zealand",
url = "https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/U18-1006/",
pages = "53--59",
abstract = "Business documents encode a wealth of information in a format tailored to human consumption {--} i.e. aesthetically disbursed natural language text, graphics and tables. We address the task of extracting key fields (e.g. the amount due on an invoice) from a wide-variety of potentially unseen document formats. In contrast to traditional template driven extraction systems, we introduce a content-driven machine-learning approach which is both robust to noise and generalises to unseen document formats. In a comparison of our approach with alternative invoice extraction systems, we observe an absolute accuracy gain of 20{\textbackslash}{\%} across compared fields, and a 25{\textbackslash}{\%}{--}94{\textbackslash}{\%} reduction in extraction latency."
}
Markdown (Informal)
[Extracting structured data from invoices](https://preview.aclanthology.org/Add-Cong-Liu-Florida-Atlantic-University-author-id/U18-1006/) (Holt & Chisholm, ALTA 2018)
ACL