Large Language Models (LLMs) have shown promising performance in summary
evaluation tasks, yet they face challenges such as high computational costs and
the Lost-in-the-Middle problem where important information in the middle of
long documents is often overlooked. To address these issues, this paper
introduces a novel approach, Extract-then-Evaluate, which involves extracting
key sentences from a long source document and then evaluating the summary by
prompting LLMs. The results reveal that the proposed method not only
significantly reduces evaluation costs but also exhibits a higher correlation
with human evaluations. Furthermore, we provide practical recommendations for
optimal document length and sentence extraction methods, contributing to the
development of cost-effective yet more accurate methods for LLM-based text
generation evaluation.
Description
Less is More for Long Document Summary Evaluation by LLMs
%0 Journal Article
%1 wu2023document
%A Wu, Yunshu
%A Iso, Hayate
%A Pezeshkpour, Pouya
%A Bhutani, Nikita
%A Hruschka, Estevam
%D 2023
%K llm
%T Less is More for Long Document Summary Evaluation by LLMs
%U http://arxiv.org/abs/2309.07382
%X Large Language Models (LLMs) have shown promising performance in summary
evaluation tasks, yet they face challenges such as high computational costs and
the Lost-in-the-Middle problem where important information in the middle of
long documents is often overlooked. To address these issues, this paper
introduces a novel approach, Extract-then-Evaluate, which involves extracting
key sentences from a long source document and then evaluating the summary by
prompting LLMs. The results reveal that the proposed method not only
significantly reduces evaluation costs but also exhibits a higher correlation
with human evaluations. Furthermore, we provide practical recommendations for
optimal document length and sentence extraction methods, contributing to the
development of cost-effective yet more accurate methods for LLM-based text
generation evaluation.
@article{wu2023document,
abstract = {Large Language Models (LLMs) have shown promising performance in summary
evaluation tasks, yet they face challenges such as high computational costs and
the Lost-in-the-Middle problem where important information in the middle of
long documents is often overlooked. To address these issues, this paper
introduces a novel approach, Extract-then-Evaluate, which involves extracting
key sentences from a long source document and then evaluating the summary by
prompting LLMs. The results reveal that the proposed method not only
significantly reduces evaluation costs but also exhibits a higher correlation
with human evaluations. Furthermore, we provide practical recommendations for
optimal document length and sentence extraction methods, contributing to the
development of cost-effective yet more accurate methods for LLM-based text
generation evaluation.},
added-at = {2023-09-19T14:15:34.000+0200},
author = {Wu, Yunshu and Iso, Hayate and Pezeshkpour, Pouya and Bhutani, Nikita and Hruschka, Estevam},
biburl = {https://www.bibsonomy.org/bibtex/26439eb9e2bb565aadabe8aba618e2306/lisa-ee},
description = {Less is More for Long Document Summary Evaluation by LLMs},
interhash = {167a704f047560fe71601f0590e796e4},
intrahash = {6439eb9e2bb565aadabe8aba618e2306},
keywords = {llm},
note = {cite arxiv:2309.07382Comment: Work in progress},
timestamp = {2023-09-19T14:15:34.000+0200},
title = {Less is More for Long Document Summary Evaluation by LLMs},
url = {http://arxiv.org/abs/2309.07382},
year = 2023
}