@inproceedings{kowieski-etal-2024-tapasgo,
title = "{TAPASGO}: Transfer Learning towards a {G}erman-Language Tabular Question Answering Model",
author = "Kowieski, Dominik Andreas and
Hellwig, Michael and
Feilhauer, Thomas",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.1354/",
pages = "15579--15584",
abstract = "Processing tabular data holds significant importance across various domains and applications. This study investigates the performance and limitations of fine-tuned models for tabular data analysis, specifically focusing on using fine-tuning mechanics on an English model towards a potential German model. The validation of the effectiveness of the transfer learning approach compares the performance of the fine-tuned German model and of the original English model on test data from the German training set. A potential shortcut that translates the German test data into English serves for comparison. Results reveal that the fine-tuned model outperforms the original model significantly, demonstrating the effectiveness of transfer learning even for a limited amount of training data. One also observes that the English model can effectively process translated German tabular data, albeit with a slight accuracy drop compared to fine-tuning. The model evaluation extends to real-world data extracted from the sustainability reports of a financial institution. The fine-tuned model proves superior in extracting knowledge from these training-unrelated tables, indicating its potential applicability in practical scenarios. This paper also releases the first manually annotated dataset for German Table Question Answering and the related annotation tool."
}
Markdown (Informal)
[TAPASGO: Transfer Learning towards a German-Language Tabular Question Answering Model](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.lrec-main.1354/) (Kowieski et al., LREC-COLING 2024)
ACL