@inproceedings{frontull-haller-seeber-2026-alignfix,
title = "{A}lign{F}ix: A Tool for Parallel Corpora Augmentation and Refinement",
author = "Frontull, Samuel and
Haller-Seeber, Simon",
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-demo.17/",
pages = "215--224",
ISBN = "979-8-89176-382-1",
abstract = "High-quality datasets are crucial for training effective state of the art machine translation systems. However, due to the data-intensive nature of these systems, they have to be trained on large amounts of text that can easily go beyond the scope of full human inspection. This makes the presence of noise that can degrade overall system performance a frequent and significant issue. While various approaches have been developed to identify and select only the highest-quality training examples, this is undesirable in scenarios where resources are limited. For this reason, we introduce AlignFix, an open-source tool for augmenting data, identifying and correcting errors in parallel corpora. Leveraging word alignments, AlignFix extracts consistent phrase pairs, enabling targeted replacements that can improve the dataset quality. Besides targeted replacements, the tool enables contextual augmentation by duplicating sentences and allowing users to substitute words with alternatives of their choice. The tool maintains and updates the underlying word alignments, thereby avoiding the costly recomputation. AlignFix runs locally in the browser, requires no installation, and ensures that all data remains entirely on the client side. It is released under Apache 2.0 license, encouraging broad adoption, reuse, and further development. A live demo is available at https://ifi-alignfix.uibk.ac.at."
}Markdown (Informal)
[AlignFix: A Tool for Parallel Corpora Augmentation and Refinement](https://preview.aclanthology.org/ingest-eacl/2026.eacl-demo.17/) (Frontull & Haller-Seeber, EACL 2026)
ACL
- Samuel Frontull and Simon Haller-Seeber. 2026. AlignFix: A Tool for Parallel Corpora Augmentation and Refinement. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 215–224, Rabat, Marocco. Association for Computational Linguistics.