@inproceedings{sung-shin-2023-towards,
title = "Towards {L}2-friendly pipelines for learner corpora: A case of written production by {L}2-{K}orean learners",
author = "Sung, Hakyung and
Shin, Gyu-Ho",
editor = {Kochmar, Ekaterina and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Madnani, Nitin and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng and
Zesch, Torsten},
booktitle = "Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.bea-1.6/",
doi = "10.18653/v1/2023.bea-1.6",
pages = "72--82",
abstract = "We introduce the Korean-Learner-Morpheme (KLM) corpus, a manually annotated dataset consisting of 129,784 morphemes from second language (L2) learners of Korean, featuring morpheme tokenization and part-of-speech (POS) tagging. We evaluate the performance of four Korean morphological analyzers in tokenization and POS tagging on the L2- Korean corpus. Results highlight the analyzers' reduced performance on L2 data, indicating the limitation of advanced deep-learning models when dealing with L2-Korean corpora. We further show that fine-tuning one of the models with the KLM corpus improves its accuracy of tokenization and POS tagging on L2-Korean dataset."
}
Markdown (Informal)
[Towards L2-friendly pipelines for learner corpora: A case of written production by L2-Korean learners](https://preview.aclanthology.org/fix-sig-urls/2023.bea-1.6/) (Sung & Shin, BEA 2023)
ACL