This paper presents an actor-critic deep reinforcement learning agent with experience replay that is stable, sample efficient, and performs remarkably well on challenging environments, including the discrete 57-game Atari domain and several continuous control problems. To achieve this, the paper introduces several innovations, including truncated importance sampling with bias correction, stochastic dueling network architectures, and a new trust region policy optimization method.
%0 Journal Article
%1 wang2016acer
%A Wang, Ziyu
%A Bapst, Victor
%A Heess, Nicolas
%A Mnih, Volodymyr
%A Munos, Rémi
%A Kavukcuoglu, Koray
%A de Freitas, Nando
%D 2016
%J CoRR
%K DRLAlgoComparison acer reinforcement_learning
%T Sample Efficient Actor-Critic with Experience Replay.
%U http://dblp.uni-trier.de/db/journals/corr/corr1611.html#WangBHMMKF16
%V abs/1611.01224
%X This paper presents an actor-critic deep reinforcement learning agent with experience replay that is stable, sample efficient, and performs remarkably well on challenging environments, including the discrete 57-game Atari domain and several continuous control problems. To achieve this, the paper introduces several innovations, including truncated importance sampling with bias correction, stochastic dueling network architectures, and a new trust region policy optimization method.
@article{wang2016acer,
abstract = {This paper presents an actor-critic deep reinforcement learning agent with experience replay that is stable, sample efficient, and performs remarkably well on challenging environments, including the discrete 57-game Atari domain and several continuous control problems. To achieve this, the paper introduces several innovations, including truncated importance sampling with bias correction, stochastic dueling network architectures, and a new trust region policy optimization method. },
added-at = {2019-12-16T18:28:00.000+0100},
author = {Wang, Ziyu and Bapst, Victor and Heess, Nicolas and Mnih, Volodymyr and Munos, Rémi and Kavukcuoglu, Koray and de Freitas, Nando},
biburl = {https://www.bibsonomy.org/bibtex/2f6fb87ed0695c5aa692d83fc7cab7794/lanteunis},
ee = {http://arxiv.org/abs/1611.01224},
interhash = {ce284493dd67acc99e0c89e834a7161c},
intrahash = {f6fb87ed0695c5aa692d83fc7cab7794},
journal = {CoRR},
keywords = {DRLAlgoComparison acer reinforcement_learning},
timestamp = {2019-12-16T21:10:37.000+0100},
title = {Sample Efficient Actor-Critic with Experience Replay.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr1611.html#WangBHMMKF16},
volume = {abs/1611.01224},
year = 2016
}