@inproceedings{qu-meng-2024-tm,
title = "{TM}-{TREK} at {S}em{E}val-2024 Task 8: Towards {LLM}-Based Automatic Boundary Detection for Human-Machine Mixed Text",
author = "Qu, Xiaoyan and
Meng, Xiangfeng",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.102/",
doi = "10.18653/v1/2024.semeval-1.102",
pages = "710--715",
abstract = "With the increasing prevalence of text gener- ated by large language models (LLMs), there is a growing concern about distinguishing be- tween LLM-generated and human-written texts in order to prevent the misuse of LLMs, such as the dissemination of misleading information and academic dishonesty. Previous research has primarily focused on classifying text as ei- ther entirely human-written or LLM-generated, neglecting the detection of mixed texts that con- tain both types of content. This paper explores LLMs' ability to identify boundaries in human- written and machine-generated mixed texts. We approach this task by transforming it into a to- ken classification problem and regard the label turning point as the boundary. Notably, our ensemble model of LLMs achieved first place in the {\textquoteleft}Human-Machine Mixed Text Detection' sub-task of the SemEval`24 Competition Task 8. Additionally, we investigate factors that in- fluence the capability of LLMs in detecting boundaries within mixed texts, including the incorporation of extra layers on top of LLMs, combination of segmentation loss, and the im- pact of pretraining. Our findings aim to provide valuable insights for future research in this area."
}
Markdown (Informal)
[TM-TREK at SemEval-2024 Task 8: Towards LLM-Based Automatic Boundary Detection for Human-Machine Mixed Text](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.semeval-1.102/) (Qu & Meng, SemEval 2024)
ACL