@inproceedings{vahtola-etal-2021-coping,
title = "Coping with Noisy Training Data Labels in Paraphrase Detection",
author = {Vahtola, Teemu and
Creutz, Mathias and
Sj{\"o}blom, Eetu and
Itkonen, Sami},
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Seventh Workshop on Noisy User-generated Text (W-NUT 2021)",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.wnut-1.32/",
doi = "10.18653/v1/2021.wnut-1.32",
pages = "291--296",
abstract = "We present new state-of-the-art benchmarks for paraphrase detection on all six languages in the Opusparcus sentential paraphrase corpus: English, Finnish, French, German, Russian, and Swedish. We reach these baselines by fine-tuning BERT. The best results are achieved on smaller and cleaner subsets of the training sets than was observed in previous research. Additionally, we study a translation-based approach that is competitive for the languages with more limited and noisier training data."
}
Markdown (Informal)
[Coping with Noisy Training Data Labels in Paraphrase Detection](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.wnut-1.32/) (Vahtola et al., WNUT 2021)
ACL