@inproceedings{liu-etal-2024-evaluating,
title = "Evaluating {C}hinese Large Language Models on Discipline Knowledge Acquisition via Memorization and Robustness Assessment",
author = "Liu, Chuang and
Jin, Renren and
Steedman, Mark and
Xiong, Deyi",
editor = "Sainz, Oscar and
Garc{\'i}a Ferrero, Iker and
Agirre, Eneko and
Ander Campos, Jon and
Jacovi, Alon and
Elazar, Yanai and
Goldberg, Yoav",
booktitle = "Proceedings of the 1st Workshop on Data Contamination (CONDA)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.conda-1.1/",
doi = "10.18653/v1/2024.conda-1.1",
pages = "1--12",
abstract = "Chinese LLMs demonstrate impressive performance on NLP tasks, particularly on discipline knowledge benchmarks, with some results approaching those of GPT-4. Previous research has viewed these advancements as potential outcomes of data contamination or leakage, prompting efforts to create new detection methods and address evaluation issues in LLM benchmarks. However, there has been a lack of comprehensive assessment of the evolution of Chinese LLMs. To address this gap, this paper offers a thorough investigation of Chinese LLMs on discipline knowledge evaluation, delving into the advancements of various LLMs, including a group of related models and others. Specifically, we have conducted six assessments ranging from knowledge memorization to comprehension for robustness, encompassing tasks like predicting incomplete questions and options, identifying behaviors by the contaminational fine-tuning, and answering rephrased questions. Experimental findings indicate a positive correlation between the release time of LLMs and their memorization capabilities, but they struggle with variations in original question-options pairs. Additionally, our findings suggest that question descriptions have a more significant impact on LLMs' performance."
}
Markdown (Informal)
[Evaluating Chinese Large Language Models on Discipline Knowledge Acquisition via Memorization and Robustness Assessment](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.conda-1.1/) (Liu et al., CONDA 2024)
ACL