@inproceedings{vajjala-banerjee-2017-study,
title = "A study of N-gram and Embedding Representations for Native Language Identification",
author = "Vajjala, Sowmya and
Banerjee, Sagnik",
editor = "Tetreault, Joel and
Burstein, Jill and
Leacock, Claudia and
Yannakoudakis, Helen",
booktitle = "Proceedings of the 12th Workshop on Innovative Use of {NLP} for Building Educational Applications",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/W17-5026/",
doi = "10.18653/v1/W17-5026",
pages = "240--248",
abstract = "We report on our experiments with N-gram and embedding based feature representations for Native Language Identification (NLI) as a part of the NLI Shared Task 2017 (team name: NLI-ISU). Our best performing system on the test set for written essays had a macro F1 of 0.8264 and was based on word uni, bi and trigram features. We explored n-grams covering word, character, POS and word-POS mixed representations for this task. For embedding based feature representations, we employed both word and document embeddings. We had a relatively poor performance with all embedding representations compared to n-grams, which could be because of the fact that embeddings capture semantic similarities whereas L1 differences are more stylistic in nature."
}
Markdown (Informal)
[A study of N-gram and Embedding Representations for Native Language Identification](https://preview.aclanthology.org/landing_page/W17-5026/) (Vajjala & Banerjee, BEA 2017)
ACL