@inproceedings{enayet-sukthankar-2022-analysis,
title = "An Analysis of Dialogue Act Sequence Similarity Across Multiple Domains",
author = "Enayet, Ayesha and
Sukthankar, Gita",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.334/",
pages = "3122--3130",
abstract = "This paper presents an analysis of how dialogue act sequences vary across different datasets in order to anticipate the potential degradation in the performance of learned models during domain adaptation. We hypothesize the following: 1) dialogue sequences from related domains will exhibit similar n-gram frequency distributions 2) this similarity can be expressed by measuring the average Hamming distance between subsequences drawn from different datasets. Our experiments confirm that when dialogue acts sequences from two datasets are dissimilar they lie further away in embedding space, making it possible to train a classifier to discriminate between them even when the datasets are corrupted with noise. We present results from eight different datasets: SwDA, AMI (DialSum), GitHub, Hate Speech, Teams, Diplomacy Betrayal, SAMsum, and Military (Army). Our datasets were collected from many types of human communication including strategic planning, informal discussion, and social media exchanges. Our methodology provides intuition on the generalizability of dialogue models trained on different datasets. Based on our analysis, it is problematic to assume that machine learning models trained on one type of discourse will generalize well to other settings, due to contextual differences."
}
Markdown (Informal)
[An Analysis of Dialogue Act Sequence Similarity Across Multiple Domains](https://preview.aclanthology.org/fix-sig-urls/2022.lrec-1.334/) (Enayet & Sukthankar, LREC 2022)
ACL