@inproceedings{van-der-werff-etal-2022-automatic,
title = "Automatic Discrimination of Human and Neural Machine Translation: A Study with Multiple Pre-Trained Models and Longer Context",
author = "van der Werff, Tobias and
van Noord, Rik and
Toral, Antonio",
editor = {Moniz, Helena and
Macken, Lieve and
Rufener, Andrew and
Barrault, Lo{\"i}c and
Costa-juss{\`a}, Marta R. and
Declercq, Christophe and
Koponen, Maarit and
Kemp, Ellie and
Pilos, Spyridon and
Forcada, Mikel L. and
Scarton, Carolina and
Van den Bogaert, Joachim and
Daems, Joke and
Tezcan, Arda and
Vanroy, Bram and
Fonteyne, Margot},
booktitle = "Proceedings of the 23rd Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2022",
address = "Ghent, Belgium",
publisher = "European Association for Machine Translation",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.eamt-1.19/",
pages = "161--170",
abstract = "We address the task of automatically distinguishing between human-translated (HT) and machine translated (MT) texts. Following recent work, we fine-tune pre-trained language models (LMs) to perform this task. Our work differs in that we use state-of-the-art pre-trained LMs, as well as the test sets of the WMT news shared tasks as training data, to ensure the sentences were not seen during training of the MT system itself. Moreover, we analyse performance for a number of different experimental setups, such as adding translationese data, going beyond the sentence-level and normalizing punctuation. We show that (i) choosing a state-of-the-art LM can make quite a difference: our best baseline system (DeBERTa) outperforms both BERT and RoBERTa by over 3{\%} accuracy, (ii) adding translationese data is only beneficial if there is not much data available, (iii) considerable improvements can be obtained by classifying at the document-level and (iv) normalizing punctuation and thus avoiding (some) shortcuts has no impact on model performance."
}
Markdown (Informal)
[Automatic Discrimination of Human and Neural Machine Translation: A Study with Multiple Pre-Trained Models and Longer Context](https://preview.aclanthology.org/fix-sig-urls/2022.eamt-1.19/) (van der Werff et al., EAMT 2022)
ACL