@inproceedings{sun-nenkova-2019-feasibility,
title = "The Feasibility of Embedding Based Automatic Evaluation for Single Document Summarization",
author = "Sun, Simeng and
Nenkova, Ani",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest_wac_2008/D19-1116/",
doi = "10.18653/v1/D19-1116",
pages = "1216--1221",
abstract = "ROUGE is widely used to automatically evaluate summarization systems. However, ROUGE measures semantic overlap between a system summary and a human reference on word-string level, much at odds with the contemporary treatment of semantic meaning. Here we present a suite of experiments on using distributed representations for evaluating summarizers, both in reference-based and in reference-free setting. Our experimental results show that the max value over each dimension of the summary ELMo word embeddings is a good representation that results in high correlation with human ratings. Averaging the cosine similarity of all encoders we tested yields high correlation with manual scores in reference-free setting. The distributed representations outperform ROUGE in recent corpora for abstractive news summarization but are less good on test data used in past evaluations."
}
Markdown (Informal)
[The Feasibility of Embedding Based Automatic Evaluation for Single Document Summarization](https://preview.aclanthology.org/ingest_wac_2008/D19-1116/) (Sun & Nenkova, EMNLP-IJCNLP 2019)
ACL