@inproceedings{chittilla-khalil-2022-huaams,
title = "{H}ua{AMS} at {S}em{E}val-2022 Task 8: Combining Translation and Domain Pre-training for Cross-lingual News Article Similarity",
author = "Chittilla, Sai Sandeep Sharma and
Khalil, Talaat",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2022.semeval-1.162/",
doi = "10.18653/v1/2022.semeval-1.162",
pages = "1151--1156",
abstract = "This paper describes our submission to SemEval-2022 Multilingual News Article Similarity task. We experiment with different approaches that utilize a pre-trained language model fitted with a regression head to predict similarity scores for a given pair of news articles. Our best performing systems include 2 key steps: 1) pre-training with in-domain data 2) training data enrichment through machine translation. Our final submission is an ensemble of predictions from our top systems. While we show the significance of pre-training and augmentation, we believe the issue of language coverage calls for more attention."
}
Markdown (Informal)
[HuaAMS at SemEval-2022 Task 8: Combining Translation and Domain Pre-training for Cross-lingual News Article Similarity](https://preview.aclanthology.org/fix-sig-urls/2022.semeval-1.162/) (Chittilla & Khalil, SemEval 2022)
ACL