@inproceedings{zhou-etal-2021-zhong,
title = "中文词语离合现象识别研究(Research on Recognition of the Separation and Reunion Phenomena of Words in {C}hinese)",
author = "Zhou, Lou and
Qu, Weiguang and
Wei, Tingxin and
Zhou, Junsheng and
Li, Bin and
Gu, Yanhui",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.10",
pages = "97--107",
abstract = "{``}汉语词语的离合现象是汉语中一种词语可分可合的特殊现象。本文采用字符级序列标注方法解决二字动词离合现象的自动识别问题,以避免中文分词及词性标注的错误传递,节省制定匹配规则与特征模板的人工开支。在训练过程中微调BERT中文预训练模型,获取面向目标任务的字符向量表示,并引入掩码机制对模型隐藏离用法中分离的词语,减轻词语本身对识别结果的影响,强化中间插入成分的学习,并对前后语素采用不同的掩码以强调其出现顺序,进而使模型具备了识别复杂及偶发性离用法的能力。为获得含有上下文信息的句子表达,将原始的句子表达与采用掩码的句子表达分别输入两个不同参数的BiLSTM层进行训练,最后采用CRF算法捕捉句子标签序列的依赖关系。本文提出的BERT MASK + 2BiLSTMs + CRF模型比现有最优的离合词识别模型提高了2.85{\%}的F1值。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2021-zhong">
<titleInfo>
<title>中文词语离合现象识别研究(Research on Recognition of the Separation and Reunion Phenomena of Words in Chinese)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lou</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiguang</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingxin</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junsheng</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanhui</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-aug</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“汉语词语的离合现象是汉语中一种词语可分可合的特殊现象。本文采用字符级序列标注方法解决二字动词离合现象的自动识别问题,以避免中文分词及词性标注的错误传递,节省制定匹配规则与特征模板的人工开支。在训练过程中微调BERT中文预训练模型,获取面向目标任务的字符向量表示,并引入掩码机制对模型隐藏离用法中分离的词语,减轻词语本身对识别结果的影响,强化中间插入成分的学习,并对前后语素采用不同的掩码以强调其出现顺序,进而使模型具备了识别复杂及偶发性离用法的能力。为获得含有上下文信息的句子表达,将原始的句子表达与采用掩码的句子表达分别输入两个不同参数的BiLSTM层进行训练,最后采用CRF算法捕捉句子标签序列的依赖关系。本文提出的BERT MASK + 2BiLSTMs + CRF模型比现有最优的离合词识别模型提高了2.85%的F1值。”</abstract>
<identifier type="citekey">zhou-etal-2021-zhong</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.10</url>
</location>
<part>
<date>2021-aug</date>
<extent unit="page">
<start>97</start>
<end>107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 中文词语离合现象识别研究(Research on Recognition of the Separation and Reunion Phenomena of Words in Chinese)
%A Zhou, Lou
%A Qu, Weiguang
%A Wei, Tingxin
%A Zhou, Junsheng
%A Li, Bin
%A Gu, Yanhui
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 aug
%I Chinese Information Processing Society of China
%C Huhhot, China
%G Chinese
%F zhou-etal-2021-zhong
%X “汉语词语的离合现象是汉语中一种词语可分可合的特殊现象。本文采用字符级序列标注方法解决二字动词离合现象的自动识别问题,以避免中文分词及词性标注的错误传递,节省制定匹配规则与特征模板的人工开支。在训练过程中微调BERT中文预训练模型,获取面向目标任务的字符向量表示,并引入掩码机制对模型隐藏离用法中分离的词语,减轻词语本身对识别结果的影响,强化中间插入成分的学习,并对前后语素采用不同的掩码以强调其出现顺序,进而使模型具备了识别复杂及偶发性离用法的能力。为获得含有上下文信息的句子表达,将原始的句子表达与采用掩码的句子表达分别输入两个不同参数的BiLSTM层进行训练,最后采用CRF算法捕捉句子标签序列的依赖关系。本文提出的BERT MASK + 2BiLSTMs + CRF模型比现有最优的离合词识别模型提高了2.85%的F1值。”
%U https://aclanthology.org/2021.ccl-1.10
%P 97-107
Markdown (Informal)
[中文词语离合现象识别研究(Research on Recognition of the Separation and Reunion Phenomena of Words in Chinese)](https://aclanthology.org/2021.ccl-1.10) (Zhou et al., CCL 2021)
ACL