@inproceedings{imamura-utiyama-2026-text,
title = "Text Filter Based on Automatically Acquired Vocabularies for Multilingual Machine Translation",
author = "Imamura, Kenji and
Utiyama, Masao",
editor = "Ojha, Atul Kr. and
Liu, Chao-hong and
Vylomova, Ekaterina and
Pirinen, Flammie and
Washington, Jonathan and
Oco, Nathaniel and
Zhao, Xiaobing",
booktitle = "Proceedings for the Ninth Workshop on Technologies for Machine Translation of Low Resource Languages ({L}o{R}es{MT} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/manual-author-scripts/2026.loresmt-1.3/",
pages = "37--48",
ISBN = "979-8-89176-366-1",
abstract = "In this paper, we propose a text filter designed to support multiple languages. The method simply aggregates vocabulary from a monolingual corpus and compares it against the input. Despite its simplicity, the approach proves highly effective in removing code-mixed text.When combined with existing language identification techniques, our method can enhance the purity of the corpus in the target language. Consequently, applying it to parallel corpora for machine translation has the potential to improve translation quality.Additionally, the proposed method supports the incremental addition of new languages without the need to retrain those already learned. This feature easily enables our method to be applied to low-resource languages."
}Markdown (Informal)
[Text Filter Based on Automatically Acquired Vocabularies for Multilingual Machine Translation](https://preview.aclanthology.org/manual-author-scripts/2026.loresmt-1.3/) (Imamura & Utiyama, LoResMT 2026)
ACL