@inproceedings{iwatsuki-aizawa-2021-communicative,
title = "Communicative-Function-Based Sentence Classification for Construction of an Academic Formulaic Expression Database",
author = "Iwatsuki, Kenichi and
Aizawa, Akiko",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/2021.eacl-main.304/",
doi = "10.18653/v1/2021.eacl-main.304",
pages = "3476--3497",
abstract = "Formulaic expressions (FEs), such as {\textquoteleft}in this paper, we propose' are frequently used in scientific papers. FEs convey a communicative function (CF), i.e. {\textquoteleft}showing the aim of the paper' in the above-mentioned example. Although CF-labelled FEs are helpful in assisting academic writing, the construction of FE databases requires manual labour for assigning CF labels. In this study, we considered a fully automated construction of a CF-labelled FE database using the top{--}down approach, in which the CF labels are first assigned to sentences, and then the FEs are extracted. For the CF-label assignment, we created a CF-labelled sentence dataset, on which we trained a SciBERT classifier. We show that the classifier and dataset can be used to construct FE databases of disciplines that are different from the training data. The accuracy of in-disciplinary classification was more than 80{\%}, while cross-disciplinary classification also worked well. We also propose an FE extraction method, which was applied to the CF-labelled sentences. Finally, we constructed and published a new, large CF-labelled FE database. The evaluation of the final CF-labelled FE database showed that approximately 65{\%} of the FEs are correct and useful, which is sufficiently high considering practical use."
}
Markdown (Informal)
[Communicative-Function-Based Sentence Classification for Construction of an Academic Formulaic Expression Database](https://preview.aclanthology.org/add-emnlp-2024-awards/2021.eacl-main.304/) (Iwatsuki & Aizawa, EACL 2021)
ACL