@inproceedings{oluwatobi-mueller-2020-dlgnet,
title = "{DLGN}et: A Transformer-based Model for Dialogue Response Generation",
author = "Oluwatobi, Olabiyi and
Mueller, Erik",
editor = "Wen, Tsung-Hsien and
Celikyilmaz, Asli and
Yu, Zhou and
Papangelis, Alexandros and
Eric, Mihail and
Kumar, Anuj and
Casanueva, I{\~n}igo and
Shah, Rushin",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.nlp4convai-1.7/",
doi = "10.18653/v1/2020.nlp4convai-1.7",
pages = "54--62",
abstract = "Neural dialogue models, despite their successes, still suffer from lack of relevance, diversity, and in many cases coherence in their generated responses. On the other hand, transformer-based models such as GPT-2 have demonstrated an excellent ability to capture long-range structures in language modeling tasks. In this paper, we present DLGNet, a transformer-based model for dialogue modeling. We specifically examine the use of DLGNet for multi-turn dialogue response generation. In our experiments, we evaluate DLGNet on the open-domain Movie Triples dataset and the closed-domain Ubuntu Dialogue dataset. DLGNet models, although trained with only the maximum likelihood objective, achieve significant improvements over state-of-the-art multi-turn dialogue models. They also produce best performance to date on the two datasets based on several metrics, including BLEU, ROUGE, and distinct n-gram. Our analysis shows that the performance improvement is mostly due to the combination of (1) the long-range transformer architecture with (2) the injection of random informative paddings. Other contributing factors include the joint modeling of dialogue context and response, and the 100{\%} tokenization coverage from the byte pair encoding (BPE)."
}
Markdown (Informal)
[DLGNet: A Transformer-based Model for Dialogue Response Generation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.nlp4convai-1.7/) (Oluwatobi & Mueller, NLP4ConvAI 2020)
ACL