@inproceedings{azadi-etal-2023-pmi,
title = "{PMI}-Align: Word Alignment With Point-Wise Mutual Information Without Requiring Parallel Training Data",
author = "Azadi, Fatemeh and
Faili, Heshaam and
Dousti, Mohammad Javad",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.782/",
doi = "10.18653/v1/2023.findings-acl.782",
pages = "12366--12377",
abstract = "Word alignment has many applications including cross-lingual annotation projection, bilingual lexicon extraction, and the evaluation or analysis of translation outputs. Recent studies show that using contextualized embeddings from pre-trained multilingual language models could give us high quality word alignments without the need of parallel training data. In this work, we propose PMI-Align which computes and uses the point-wise mutual information between source and target tokens to extract word alignments, instead of the cosine similarity or dot product which is mostly used in recent approaches. Our experiments show that our proposed PMI-Align approach could outperform the rival methods on five out of six language pairs. Although our approach requires no parallel training data, we show that this method could also benefit the approaches using parallel data to fine-tune pre-trained language models on word alignments. Our code and data are publicly available."
}
Markdown (Informal)
[PMI-Align: Word Alignment With Point-Wise Mutual Information Without Requiring Parallel Training Data](https://preview.aclanthology.org/fix-sig-urls/2023.findings-acl.782/) (Azadi et al., Findings 2023)
ACL