@inproceedings{wenjuan-etal-2024-ji,
title = "基于隐性句逗号识别的汉语长句机器翻译(Machine translation of {C}hinese long sentences based on recognition of implicit period and comma)",
author = "Wenjuan, Zhang and
Manjia, Li and
Wenhe, Feng",
editor = "Sun, Maosong and
Liang, Jiye and
Han, Xianpei and
Liu, Zhiyuan and
He, Yulan",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.ccl-1.14/",
pages = "197--205",
language = "zho",
abstract = "{\textquotedblleft}长句翻译一直是机器翻译的难题。本文根据汉语中相当数量的逗号(句内标点)和句号(句间标点)可相互转化的特点,提出{\textquotedblright}隐性句号{\textquotedblright}(可转化为句号的逗号)和{\textquotedblright}隐性逗号{\textquotedblright}(可转化为逗号的句号)概念,并实现其自动识别,以将汉语长句变为短句用于汉英机器翻译。为此,首先通过人工与半监督学习结合方法构建了一个隐性句逗数据集,实现了基于预训练模型的隐性句逗识别方法,其中性能最好的HierarchicalBERT作为后续应用模型。进而,实现了基于隐性句逗识别的汉英机器翻译方法。在WMT2018(新闻)和WMT2023(文学)测试语料上基于预训练机器翻译模型的实验表明,对于汉语长句的英译,本文方法相比基准翻译的BLEU值整体有所提高,而且在相对稳健机器翻译模型上,呈现为句子越长本文方法效果越明显。{\textquotedblright}"
}
Markdown (Informal)
[基于隐性句逗号识别的汉语长句机器翻译(Machine translation of Chinese long sentences based on recognition of implicit period and comma)](https://preview.aclanthology.org/jlcl-multiple-ingestion/2024.ccl-1.14/) (Wenjuan et al., CCL 2024)
ACL