@inproceedings{tsang-etal-2024-deciphering,
title = "Deciphering Cyber Threats: A Unifying Framework with {GPT}-3.5, {BERT}opic and Feature Importance",
author = "Tsang, Chun Man and
Bell, Tom and
Gouglidis, Antonios and
El-Haj, Mo",
editor = "Mitkov, Ruslan and
Ezzini, Saad and
Ranasinghe, Tharindu and
Ezeani, Ignatius and
Khallaf, Nouran and
Acarturk, Cengiz and
Bradbury, Matthew and
El-Haj, Mo and
Rayson, Paul",
booktitle = "Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
month = jul,
year = "2024",
address = "Lancaster, UK",
publisher = "International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.nlpaics-1.20/",
pages = "175--185",
abstract = "This paper presents a methodology for the categorisation and attribute quantification of cyber threats. The data was sourced from Common Weakness Enumeration (CWE) entries, encompassing 503 hardware and software vulnerabilities. For each entry, GPT-3.5 generated detailed descriptions for 12 key threat attributes. Employing BERTopic for topic modelling, our research focuses on clustering cyber threats and evaluates the efficacy of various dimensionality reduction and clustering algorithms, notably finding that UMAP combined with HDBSCAN, optimised through parameterisation, outperforms other configurations. The study further explores feature importance analysis by converting topic modelling results into a classification paradigm, achieving classification accuracies between 60{\%} and 80{\%} with algorithms such as Random Forest, XGBoost, and Linear SVM. This feature importance analysis quantifies the significance of each threat attribute, with SHAP identified as the most effective method for this calculation."
}
Markdown (Informal)
[Deciphering Cyber Threats: A Unifying Framework with GPT-3.5, BERTopic and Feature Importance](https://preview.aclanthology.org/fix-sig-urls/2024.nlpaics-1.20/) (Tsang et al., NLPAICS 2024)
ACL