@inproceedings{steingrimsson-etal-2021-combalign,
title = "{C}omb{A}lign: a Tool for Obtaining High-Quality Word Alignments",
author = "Steingr{\'i}msson, Stein{\th}{\'o}r and
Loftsson, Hrafn and
Way, Andy",
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.nodalida-main.7/",
pages = "64--73",
abstract = "Being able to generate accurate word alignments is useful for a variety of tasks. While statistical word aligners can work well, especially when parallel training data are plentiful, multilingual embedding models have recently been shown to give good results in unsupervised scenarios. We evaluate an ensemble method for word alignment on four language pairs and demonstrate that by combining multiple tools, taking advantage of their different approaches, substantial gains can be made. This holds for settings ranging from very low-resource to high-resource. Furthermore, we introduce a new gold alignment test set for Icelandic and a new easy-to-use tool for creating manual word alignments."
}
Markdown (Informal)
[CombAlign: a Tool for Obtaining High-Quality Word Alignments](https://preview.aclanthology.org/jlcl-multiple-ingestion/2021.nodalida-main.7/) (Steingrímsson et al., NoDaLiDa 2021)
ACL