@inproceedings{marie-fujita-2017-efficient,
title = "Efficient Extraction of Pseudo-Parallel Sentences from Raw Monolingual Data Using Word Embeddings",
author = "Marie, Benjamin and
Fujita, Atsushi",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/add-emnlp-2024-awards/P17-2062/",
doi = "10.18653/v1/P17-2062",
pages = "392--398",
abstract = "We propose a new method for extracting pseudo-parallel sentences from a pair of large monolingual corpora, without relying on any document-level information. Our method first exploits word embeddings in order to efficiently evaluate trillions of candidate sentence pairs and then a classifier to find the most reliable ones. We report significant improvements in domain adaptation for statistical machine translation when using a translation model trained on the sentence pairs extracted from in-domain monolingual corpora."
}
Markdown (Informal)
[Efficient Extraction of Pseudo-Parallel Sentences from Raw Monolingual Data Using Word Embeddings](https://preview.aclanthology.org/add-emnlp-2024-awards/P17-2062/) (Marie & Fujita, ACL 2017)
ACL