@inproceedings{higashiyama-utiyama-2025-comprehensive,
title = "Comprehensive Evaluation on Lexical Normalization: Boundary-Aware Approaches for Unsegmented Languages",
author = "Higashiyama, Shohei and
Utiyama, Masao",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.684/",
doi = "10.18653/v1/2025.findings-emnlp.684",
pages = "12774--12799",
ISBN = "979-8-89176-335-7",
abstract = "Lexical normalization research has sought to tackle the challenge of processing informal expressions in user-generated text, yet the absence of comprehensive evaluations leaves it unclear which methods excel across multiple perspectives. Focusing on unsegmented languages, we make three key contributions: (1) creating a large-scale, multi-domain Japanese normalization dataset, (2) developing normalization methods based on state-of-the-art pre-trained models, and (3) conducting experiments across multiple evaluation perspectives. Our experiments show that both encoder-only and decoder-only approaches achieve promising results in both accuracy and efficiency."
}Markdown (Informal)
[Comprehensive Evaluation on Lexical Normalization: Boundary-Aware Approaches for Unsegmented Languages](https://preview.aclanthology.org/author-page-yu-wang-polytechnic/2025.findings-emnlp.684/) (Higashiyama & Utiyama, Findings 2025)
ACL