@inproceedings{ryan-hulden-2020-data,
title = "Data Augmentation for Transformer-based {G}2{P}",
author = "Ryan, Zach and
Hulden, Mans",
editor = "Nicolai, Garrett and
Gorman, Kyle and
Cotterell, Ryan",
booktitle = "Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.sigmorphon-1.21/",
doi = "10.18653/v1/2020.sigmorphon-1.21",
pages = "184--188",
abstract = "The Transformer model has been shown to outperform other neural seq2seq models in several character-level tasks. It is unclear, however, if the Transformer would benefit as much as other seq2seq models from data augmentation strategies in the low-resource setting. In this paper we explore strategies for data augmentation in the g2p task together with the Transformer model. Our results show that a relatively simple alignment-based strategy of identifying consistent input-output subsequences in grapheme-phoneme data coupled together with a subsequent splicing together of such pieces to generate hallucinated data works well in the low-resource setting, often delivering substantial performance improvement over a standard Transformer model."
}
Markdown (Informal)
[Data Augmentation for Transformer-based G2P](https://preview.aclanthology.org/jlcl-multiple-ingestion/2020.sigmorphon-1.21/) (Ryan & Hulden, SIGMORPHON 2020)
ACL
- Zach Ryan and Mans Hulden. 2020. Data Augmentation for Transformer-based G2P. In Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 184–188, Online. Association for Computational Linguistics.