@inproceedings{kanakagiri-radhakrishnan-2021-task,
title = "Task-Oriented Dialog Systems for {D}ravidian Languages",
author = "Kanakagiri, Tushar and
Radhakrishnan, Karthik",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Kumar M, Anand and
Krishnamurthy, Parameswari and
Sherly, Elizabeth",
booktitle = "Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/2021.dravidianlangtech-1.11/",
pages = "85--93",
abstract = "Task-oriented dialog systems help a user achieve a particular goal by parsing user requests to execute a particular action. These systems typically require copious amounts of training data to effectively understand the user intent and its corresponding slots. Acquiring large training corpora requires significant manual effort in annotation, rendering its construction infeasible for low-resource languages. In this paper, we present a two-step approach for automatically constructing task-oriented dialogue data in such languages by making use of annotated data from high resource languages. First, we use a machine translation (MT) system to translate the utterance and slot information to the target language. Second, we use token prefix matching and mBERT based semantic matching to align the slot tokens to the corresponding tokens in the utterance. We hand-curate a new test dataset in two low-resource Dravidian languages and show the significance and impact of our training dataset construction using a state-of-the-art mBERT model - achieving a Slot F1 of 81.51 (Kannada) and 78.82 (Tamil) on our test sets."
}
Markdown (Informal)
[Task-Oriented Dialog Systems for Dravidian Languages](https://preview.aclanthology.org/ingest_wac_2008/2021.dravidianlangtech-1.11/) (Kanakagiri & Radhakrishnan, DravidianLangTech 2021)
ACL
- Tushar Kanakagiri and Karthik Radhakrishnan. 2021. Task-Oriented Dialog Systems for Dravidian Languages. In Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages, pages 85–93, Kyiv. Association for Computational Linguistics.