@inproceedings{hercig-kral-2020-uwb,
title = "{UWB}@{F}in{TOC}-2020 Shared Task: Financial Document Title Detection",
author = "Hercig, Tom{\'a}{\v{s}} and
Kral, Pavel",
editor = "El-Haj, Dr Mahmoud and
Athanasakou, Dr Vasiliki and
Ferradans, Dr Sira and
Salzedo, Dr Catherine and
Elhag, Dr Ans and
Bouamor, Dr Houda and
Litvak, Dr Marina and
Rayson, Dr Paul and
Giannakopoulos, Dr George and
Pittaras, Nikiforos",
booktitle = "Proceedings of the 1st Joint Workshop on Financial Narrative Processing and MultiLing Financial Summarisation",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "COLING",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.fnp-1.27/",
pages = "158--162",
abstract = "This paper describes our system created for the Financial Document Structure Extraction Shared Task (FinTOC-2020): Title Detection. We rely on the Apache PDFBox library to extract text and all additional information e.g. font type and font size from the financial prospectuses. Our constrained system uses only the provided training data without any additional external resources. Our system is based on the Maximum Entropy classifier and various features including font type and font size. Our system achieves F1 score 81{\%} and {\#}1 place in the French track and F1 score 77{\%} and {\#}2 place among 5 participating teams in the English track."
}
Markdown (Informal)
[UWB@FinTOC-2020 Shared Task: Financial Document Title Detection](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.fnp-1.27/) (Hercig & Kral, FNP 2020)
ACL