In recent years deep reinforcement learning (RL) systems have attained
superhuman performance in a number of challenging task domains. However, a
major limitation of such applications is their demand for massive amounts of
training data. A critical present objective is thus to develop deep RL methods
that can adapt rapidly to new tasks. In the present work we introduce a novel
approach to this challenge, which we refer to as deep meta-reinforcement
learning. Previous work has shown that recurrent networks can support
meta-learning in a fully supervised context. We extend this approach to the RL
setting. What emerges is a system that is trained using one RL algorithm, but
whose recurrent dynamics implement a second, quite separate RL procedure. This
second, learned RL algorithm can differ from the original one in arbitrary
ways. Importantly, because it is learned, it is configured to exploit structure
in the training domain. We unpack these points in a series of seven
proof-of-concept experiments, each of which examines a key aspect of deep
meta-RL. We consider prospects for extending and scaling up the approach, and
also point out some potentially important implications for neuroscience.
%0 Journal Article
%1 wang2016learning
%A Wang, Jane X
%A Kurth-Nelson, Zeb
%A Tirumala, Dhruva
%A Soyer, Hubert
%A Leibo, Joel Z
%A Munos, Remi
%A Blundell, Charles
%A Kumaran, Dharshan
%A Botvinick, Matt
%D 2016
%J CoRR
%K ReinforcementLearning
%T Learning to reinforcement learn
%U http://arxiv.org/abs/1611.05763
%V abs/1611.05763
%X In recent years deep reinforcement learning (RL) systems have attained
superhuman performance in a number of challenging task domains. However, a
major limitation of such applications is their demand for massive amounts of
training data. A critical present objective is thus to develop deep RL methods
that can adapt rapidly to new tasks. In the present work we introduce a novel
approach to this challenge, which we refer to as deep meta-reinforcement
learning. Previous work has shown that recurrent networks can support
meta-learning in a fully supervised context. We extend this approach to the RL
setting. What emerges is a system that is trained using one RL algorithm, but
whose recurrent dynamics implement a second, quite separate RL procedure. This
second, learned RL algorithm can differ from the original one in arbitrary
ways. Importantly, because it is learned, it is configured to exploit structure
in the training domain. We unpack these points in a series of seven
proof-of-concept experiments, each of which examines a key aspect of deep
meta-RL. We consider prospects for extending and scaling up the approach, and
also point out some potentially important implications for neuroscience.
@article{wang2016learning,
abstract = {In recent years deep reinforcement learning (RL) systems have attained
superhuman performance in a number of challenging task domains. However, a
major limitation of such applications is their demand for massive amounts of
training data. A critical present objective is thus to develop deep RL methods
that can adapt rapidly to new tasks. In the present work we introduce a novel
approach to this challenge, which we refer to as deep meta-reinforcement
learning. Previous work has shown that recurrent networks can support
meta-learning in a fully supervised context. We extend this approach to the RL
setting. What emerges is a system that is trained using one RL algorithm, but
whose recurrent dynamics implement a second, quite separate RL procedure. This
second, learned RL algorithm can differ from the original one in arbitrary
ways. Importantly, because it is learned, it is configured to exploit structure
in the training domain. We unpack these points in a series of seven
proof-of-concept experiments, each of which examines a key aspect of deep
meta-RL. We consider prospects for extending and scaling up the approach, and
also point out some potentially important implications for neuroscience.},
added-at = {2017-08-02T14:03:20.000+0200},
author = {Wang, Jane X and Kurth-Nelson, Zeb and Tirumala, Dhruva and Soyer, Hubert and Leibo, Joel Z and Munos, Remi and Blundell, Charles and Kumaran, Dharshan and Botvinick, Matt},
biburl = {https://www.bibsonomy.org/bibtex/2d681b99aa9fe2de6e30669638a7b35fc/thnld},
description = {[1611.05763] Learning to reinforcement learn},
interhash = {e0f86cfd10e96c90f567ebb2e92fcce3},
intrahash = {d681b99aa9fe2de6e30669638a7b35fc},
journal = {CoRR},
keywords = {ReinforcementLearning},
note = {cite arxiv:1611.05763Comment: 17 pages, 7 figures, 1 table},
timestamp = {2017-08-02T14:03:20.000+0200},
title = {Learning to reinforcement learn},
url = {http://arxiv.org/abs/1611.05763},
volume = {abs/1611.05763},
year = 2016
}