@article{maekawa-etal-2026-parallel,
title = "Parallel Corpus Filtering Based on Semantic Similarity and Surface Dissimilarity for {J}apanese Text Simplification with {LLM}s",
author = "Maekawa, Daisuke and
Kajiwara, Tomoyuki and
Ninomiya, Takashi",
editor = "Piperidis, Stelios and
Bel, N{\'u}ria and
van den Heuvel, Henk and
Ide, Nancy and
Krek, Simon and
Toral, Antonio",
journal = "International Conference on Language Resources and Evaluation",
volume = "main",
month = may,
year = "2026",
address = "Palma de Mallorca, Spain",
publisher = "ELRA Language Resource Association",
url = "https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.86/",
pages = "1110--1116",
abstract = "We are focusing on low-cost fine-tuning for large language models (LLMs) in Japanese text simplification. LLMs have achieved high performance even with fine-tuning on small parallel corpora in tasks such as machine translation and dialogue response generation. In this study, we propose a method of parallel corpus filtering for text simplification and investigate how much the number of sentence pairs for fine-tuning LLMs can be reduced. Experimental results on Japanese corpora in three domains revealed that the ability to perform text simplification tasks can be acquired even from a very small corpus of 16 to 64 sentence pairs. Although more parallel corpora are needed to acquire domain knowledge, our method outperformed full fine-tuning while reducing the training corpus by approximately 70{\%}."
}Markdown (Informal)
[Parallel Corpus Filtering Based on Semantic Similarity and Surface Dissimilarity for Japanese Text Simplification with LLMs](https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.86/) (Maekawa et al., LREC 2026)
ACL