@inproceedings{kew-sennrich-2023-uncovering,
title = "Uncovering Hidden Consequences of Pre-training Objectives in Sequence-to-Sequence Models",
author = "Kew, Tannon and
Sennrich, Rico",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.findings-acl.438/",
doi = "10.18653/v1/2023.findings-acl.438",
pages = "7010--7022",
abstract = "Some variants of self-supervised denoising objectives for pre-training encoder-decoder language models have been reported to have a negligible impact on downstream performance. Yet the design of these pre-training objectives leads to behavioural differences that can be uncovered with specific manipulations. We reproduce a recently proposed zero-shot control method and find that it is only successful on a subset of models. To understand what causes the difference in its effectiveness, we perform a set of controlled experiments, varying only the pre-training objective, and find unexpected interactions between the pre-training method and downstream controllability of models after fine-tuning. Our results show that different pre-training objectives have consequences that may not be visible in standard downstream evaluation, but which should be taken into account when developing models with controllability in mind."
}
Markdown (Informal)
[Uncovering Hidden Consequences of Pre-training Objectives in Sequence-to-Sequence Models](https://preview.aclanthology.org/Author-page-Marten-During-lu/2023.findings-acl.438/) (Kew & Sennrich, Findings 2023)
ACL