@inproceedings{khosla-gangadharaiah-2022-evaluating,
title = "Evaluating the Practical Utility of Confidence-score based Techniques for Unsupervised Open-world Classification",
author = "Khosla, Sopan and
Gangadharaiah, Rashmi",
editor = "Tafreshi, Shabnam and
Sedoc, Jo{\~a}o and
Rogers, Anna and
Drozd, Aleksandr and
Rumshisky, Anna and
Akula, Arjun",
booktitle = "Proceedings of the Third Workshop on Insights from Negative Results in NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.insights-1.3/",
doi = "10.18653/v1/2022.insights-1.3",
pages = "18--23",
abstract = "Open-world classification in dialog systems require models to detect open intents, while ensuring the quality of in-domain (ID) intent classification. In this work, we revisit methods that leverage distance-based statistics for unsupervised out-of-domain (OOD) detection. We show that despite their superior performance on threshold-independent metrics like AUROC on test-set, threshold values chosen based on the performance on a validation-set do not generalize well to the test-set, thus resulting in substantially lower performance on ID or OOD detection accuracy and F1-scores. Our analysis shows that this lack of generalizability can be successfully mitigated by setting aside a hold-out set from validation data for threshold selection (sometimes achieving relative gains as high as 100{\%}). Extensive experiments on seven benchmark datasets show that this fix puts the performance of these methods at par with, or sometimes even better than, the current state-of-the-art OOD detection techniques."
}
Markdown (Informal)
[Evaluating the Practical Utility of Confidence-score based Techniques for Unsupervised Open-world Classification](https://preview.aclanthology.org/jlcl-multiple-ingestion/2022.insights-1.3/) (Khosla & Gangadharaiah, insights 2022)
ACL