Although scaling up language model size has reliably improved performance on
a range of NLP tasks, even the largest models currently struggle with certain
reasoning tasks such as math word problems, symbolic manipulation, and
commonsense reasoning. This paper explores the ability of language models to
generate a coherent chain of thought -- a series of short sentences that mimic
the reasoning process a person might have when responding to a question.
Experiments show that inducing a chain of thought via prompting can enable
sufficiently large language models to better perform reasoning tasks that
otherwise have flat scaling curves. When combined with the 540B parameter PaLM
model, chain of thought prompting achieves new state of the art of 58.1\% on
the GSM8K benchmark of math word problems.
Description
Chain of Thought Prompting Elicits Reasoning in Large Language Models
%0 Generic
%1 wei2022chain
%A Wei, Jason
%A Wang, Xuezhi
%A Schuurmans, Dale
%A Bosma, Maarten
%A Chi, Ed
%A Le, Quoc
%A Zhou, Denny
%D 2022
%K google language large model network neural nlp nn prompting reasoning
%T Chain of Thought Prompting Elicits Reasoning in Large Language Models
%U http://arxiv.org/abs/2201.11903
%X Although scaling up language model size has reliably improved performance on
a range of NLP tasks, even the largest models currently struggle with certain
reasoning tasks such as math word problems, symbolic manipulation, and
commonsense reasoning. This paper explores the ability of language models to
generate a coherent chain of thought -- a series of short sentences that mimic
the reasoning process a person might have when responding to a question.
Experiments show that inducing a chain of thought via prompting can enable
sufficiently large language models to better perform reasoning tasks that
otherwise have flat scaling curves. When combined with the 540B parameter PaLM
model, chain of thought prompting achieves new state of the art of 58.1\% on
the GSM8K benchmark of math word problems.
@misc{wei2022chain,
abstract = {Although scaling up language model size has reliably improved performance on
a range of NLP tasks, even the largest models currently struggle with certain
reasoning tasks such as math word problems, symbolic manipulation, and
commonsense reasoning. This paper explores the ability of language models to
generate a coherent chain of thought -- a series of short sentences that mimic
the reasoning process a person might have when responding to a question.
Experiments show that inducing a chain of thought via prompting can enable
sufficiently large language models to better perform reasoning tasks that
otherwise have flat scaling curves. When combined with the 540B parameter PaLM
model, chain of thought prompting achieves new state of the art of 58.1\% on
the GSM8K benchmark of math word problems.},
added-at = {2022-04-07T16:43:58.000+0200},
author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Chi, Ed and Le, Quoc and Zhou, Denny},
biburl = {https://www.bibsonomy.org/bibtex/289ac94bc2fca7137a21c01d8df31d98e/becker},
description = {Chain of Thought Prompting Elicits Reasoning in Large Language Models},
interhash = {2ad9e5db9ae35db75d709f0f38695c6b},
intrahash = {89ac94bc2fca7137a21c01d8df31d98e},
keywords = {google language large model network neural nlp nn prompting reasoning},
note = {cite arxiv:2201.11903},
timestamp = {2022-04-23T21:12:03.000+0200},
title = {Chain of Thought Prompting Elicits Reasoning in Large Language Models},
url = {http://arxiv.org/abs/2201.11903},
year = 2022
}