@inproceedings{kann-etal-2018-sentence,
title = "Sentence-Level Fluency Evaluation: References Help, But Can Be Spared!",
author = "Kann, Katharina and
Rothe, Sascha and
Filippova, Katja",
editor = "Korhonen, Anna and
Titov, Ivan",
booktitle = "Proceedings of the 22nd Conference on Computational Natural Language Learning",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/K18-1031/",
doi = "10.18653/v1/K18-1031",
pages = "313--323",
abstract = "Motivated by recent findings on the probabilistic modeling of acceptability judgments, we propose syntactic log-odds ratio (SLOR), a normalized language model score, as a metric for referenceless fluency evaluation of natural language generation output at the sentence level. We further introduce WPSLOR, a novel WordPiece-based version, which harnesses a more compact language model. Even though word-overlap metrics like ROUGE are computed with the help of hand-written references, our referenceless methods obtain a significantly higher correlation with human fluency scores on a benchmark dataset of compressed sentences. Finally, we present ROUGE-LM, a reference-based metric which is a natural extension of WPSLOR to the case of available references. We show that ROUGE-LM yields a significantly higher correlation with human judgments than all baseline metrics, including WPSLOR on its own."
}
Markdown (Informal)
[Sentence-Level Fluency Evaluation: References Help, But Can Be Spared!](https://preview.aclanthology.org/ingest_wac_2008/K18-1031/) (Kann et al., CoNLL 2018)
ACL