@inproceedings{sravani-mamidi-2023-enhancing,
title = "Enhancing Code-mixed Text Generation Using Synthetic Data Filtering in Neural Machine Translation",
author = "Sravani, Dama and
Mamidi, Radhika",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.15/",
doi = "10.18653/v1/2023.conll-1.15",
pages = "211--220",
abstract = "Code-Mixing, the act of mixing two or more languages, is a common communicative phenomenon in multi-lingual societies. The lack of quality in code-mixed data is a bottleneck for NLP systems. On the other hand, Monolingual systems perform well due to ample high-quality data. To bridge the gap, creating coherent translations of monolingual sentences to their code-mixed counterparts can improve accuracy in code-mixed settings for NLP downstream tasks. In this paper, we propose a neural machine translation approach to generate high-quality code-mixed sentences by leveraging human judgements. We train filters based on human judgements to identify natural code-mixed sentences from a larger synthetically generated code-mixed corpus, resulting in a three-way silver parallel corpus between monolingual English, monolingual Indian language and code-mixed English with an Indian language. Using these corpora, we fine-tune multi-lingual encoder-decoder models viz, mT5 and mBART, for the translation task. Our results indicate that our approach of using filtered data for training outperforms the current systems for code-mixed generation in Hindi-English. Apart from Hindi-English, the approach performs well when applied to Telugu, a low-resource language, to generate Telugu-English code-mixed sentences."
}
Markdown (Informal)
[Enhancing Code-mixed Text Generation Using Synthetic Data Filtering in Neural Machine Translation](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.conll-1.15/) (Sravani & Mamidi, CoNLL 2023)
ACL