@inproceedings{harbecke-etal-2022-micro,
title = "Why only Micro-F1? Class Weighting of Measures for Relation Classification",
author = "Harbecke, David and
Chen, Yuxuan and
Hennig, Leonhard and
Alt, Christoph",
editor = "Shavrina, Tatiana and
Mikhailov, Vladislav and
Malykh, Valentin and
Artemova, Ekaterina and
Serikov, Oleg and
Protasov, Vitaly",
booktitle = "Proceedings of NLP Power! The First Workshop on Efficient Benchmarking in NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.nlppower-1.4/",
doi = "10.18653/v1/2022.nlppower-1.4",
pages = "32--41",
abstract = "Relation classification models are conventionally evaluated using only a single measure, e.g., micro-F1, macro-F1 or AUC. In this work, we analyze weighting schemes, such as micro and macro, for imbalanced datasets. We introduce a framework for weighting schemes, where existing schemes are extremes, and two new intermediate schemes. We show that reporting results of different weighting schemes better highlights strengths and weaknesses of a model."
}
Markdown (Informal)
[Why only Micro-F1? Class Weighting of Measures for Relation Classification](https://preview.aclanthology.org/fix-sig-urls/2022.nlppower-1.4/) (Harbecke et al., nlppower 2022)
ACL