@inproceedings{valenzuela-escarcega-etal-2020-odinson,
title = "{O}dinson: A Fast Rule-based Information Extraction Framework",
author = "Valenzuela-Esc{\'a}rcega, Marco A. and
Hahn-Powell, Gus and
Bell, Dane",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2020.lrec-1.267/",
pages = "2183--2191",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "We present Odinson, a rule-based information extraction framework, which couples a simple yet powerful pattern language that can operate over multiple representations of text, with a runtime system that operates in near real time. In the Odinson query language, a single pattern may combine regular expressions over surface tokens with regular expressions over graphs such as syntactic dependencies. To guarantee the rapid matching of these patterns, our framework indexes most of the necessary information for matching patterns, including directed graphs such as syntactic dependencies, into a custom Lucene index. Indexing minimizes the amount of expensive pattern matching that must take place at runtime. As a result, the runtime system matches a syntax-based graph traversal in 2.8 seconds in a corpus of over 134 million sentences, nearly 150,000 times faster than its predecessor."
}
Markdown (Informal)
[Odinson: A Fast Rule-based Information Extraction Framework](https://preview.aclanthology.org/add-emnlp-2024-awards/2020.lrec-1.267/) (Valenzuela-Escárcega et al., LREC 2020)
ACL