@inproceedings{zheng-etal-2023-system,
title = "System Report for {CCL}23-Eval Task 6: A Method For Telecom Network Fraud Case Classification Based on Two-stage Training Framework and Within-task Pretraining",
author = "Zheng, Guangyu and
He, Tingting and
Wang, Zhenyu and
Wang, Haochang",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics (Volume 3: Evaluations)",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.ccl-3.23/",
pages = "206--212",
language = "eng",
abstract = "{\textquotedblleft}Domain-specific text classification often needs more external knowledge, and fraud cases havefewer descriptions. Existing methods usually utilize single-stage deep models to extract semanticfeatures, which is less reusable. To tackle this issue, we propose a two-stage training frameworkbased on within-task pretraining and multi-dimensional semantic enhancement for CCL23-EvalTask 6 (Telecom Network Fraud Case Classification, FCC). Our training framework is dividedinto two stages. First, we pre-train using the training corpus to obtain specific BERT. The seman-tic mining ability of the model is enhanced from the feature space perspective by introducing ad-versarial training and multiple random sampling. The pseudo-labeled data is generated throughthe test data above a certain threshold. Second, pseudo-labeled samples are added to the trainingset for semantic enhancement based on the sample space dimension. We utilize the same back-bone for prediction to obtain the results. Experimental results show that our proposed methodoutperforms the single-stage benchmarks and achieves competitive performance with 0.859259F1. It also performs better in the few-shot patent classification task with 65.160{\%} F1, whichindicates robustness.{\textquotedblright}"
}
Markdown (Informal)
[System Report for CCL23-Eval Task 6: A Method For Telecom Network Fraud Case Classification Based on Two-stage Training Framework and Within-task Pretraining](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.ccl-3.23/) (Zheng et al., CCL 2023)
ACL