@inproceedings{tan-etal-2020-tnt,
title = "{TNT}: Text Normalization based Pre-training of Transformers for Content Moderation",
author = "Tan, Fei and
Hu, Yifan and
Hu, Changwei and
Li, Keqian and
Yen, Kevin",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.383",
doi = "10.18653/v1/2020.emnlp-main.383",
pages = "4735--4741",
abstract = "In this work, we present a new language pre-training model TNT (Text Normalization based pre-training of Transformers) for content moderation. Inspired by the masking strategy and text normalization, TNT is developed to learn language representation by training transformers to reconstruct text from four operation types typically seen in text manipulation: substitution, transposition, deletion, and insertion. Furthermore, the normalization involves the prediction of both operation types and token labels, enabling TNT to learn from more challenging tasks than the standard task of masked word recovery. As a result, the experiments demonstrate that TNT outperforms strong baselines on the hate speech classification task. Additional text normalization experiments and case studies show that TNT is a new potential approach to misspelling correction.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tan-etal-2020-tnt">
<titleInfo>
<title>TNT: Text Normalization based Pre-training of Transformers for Content Moderation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changwei</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keqian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Yen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we present a new language pre-training model TNT (Text Normalization based pre-training of Transformers) for content moderation. Inspired by the masking strategy and text normalization, TNT is developed to learn language representation by training transformers to reconstruct text from four operation types typically seen in text manipulation: substitution, transposition, deletion, and insertion. Furthermore, the normalization involves the prediction of both operation types and token labels, enabling TNT to learn from more challenging tasks than the standard task of masked word recovery. As a result, the experiments demonstrate that TNT outperforms strong baselines on the hate speech classification task. Additional text normalization experiments and case studies show that TNT is a new potential approach to misspelling correction.</abstract>
<identifier type="citekey">tan-etal-2020-tnt</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.383</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.383</url>
</location>
<part>
<date>2020-nov</date>
<extent unit="page">
<start>4735</start>
<end>4741</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TNT: Text Normalization based Pre-training of Transformers for Content Moderation
%A Tan, Fei
%A Hu, Yifan
%A Hu, Changwei
%A Li, Keqian
%A Yen, Kevin
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 nov
%I Association for Computational Linguistics
%C Online
%F tan-etal-2020-tnt
%X In this work, we present a new language pre-training model TNT (Text Normalization based pre-training of Transformers) for content moderation. Inspired by the masking strategy and text normalization, TNT is developed to learn language representation by training transformers to reconstruct text from four operation types typically seen in text manipulation: substitution, transposition, deletion, and insertion. Furthermore, the normalization involves the prediction of both operation types and token labels, enabling TNT to learn from more challenging tasks than the standard task of masked word recovery. As a result, the experiments demonstrate that TNT outperforms strong baselines on the hate speech classification task. Additional text normalization experiments and case studies show that TNT is a new potential approach to misspelling correction.
%R 10.18653/v1/2020.emnlp-main.383
%U https://aclanthology.org/2020.emnlp-main.383
%U https://doi.org/10.18653/v1/2020.emnlp-main.383
%P 4735-4741
Markdown (Informal)
[TNT: Text Normalization based Pre-training of Transformers for Content Moderation](https://aclanthology.org/2020.emnlp-main.383) (Tan et al., EMNLP 2020)
ACL