Policy-gradient-based actor-critic algorithms are amongst the most popular algorithms in the reinforcement learning framework. Their advantage of being able to search for optimal policies using low-variance gradient estimates has made them useful in several real-life applications, such as robotics, power control, and finance. Although general surveys on reinforcement learning techniques already exist, no survey is specifically dedicated to actor-critic algorithms in particular. This paper, therefore, describes the state of the art of actor-critic algorithms, with a focus on methods that can work in an online setting and use function approximation in order to deal with continuous state and action spaces. After starting with a discussion on the concepts of reinforcement learning and the origins of actor-critic algorithms, this paper describes the workings of the natural gradient, which has made its way into many actor-critic algorithms over the past few years. A review of several standard and natural actor-critic algorithms is given, and the paper concludes with an overview of application areas and a discussion on open issues.
%0 Journal Article
%1 grondman2012ActorCriticSurvey
%A Grondman, I.
%A Busoniu, L.
%A Lopes, G. A. D.
%A Babuska, R.
%D 2012
%J IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)
%K actor_critic final reinforcement survey thema:reinforcement_learning_recommender
%N 6
%P 1291-1307
%R 10.1109/TSMCC.2012.2218595
%T A Survey of Actor-Critic Reinforcement Learning: Standard and Natural Policy Gradients
%V 42
%X Policy-gradient-based actor-critic algorithms are amongst the most popular algorithms in the reinforcement learning framework. Their advantage of being able to search for optimal policies using low-variance gradient estimates has made them useful in several real-life applications, such as robotics, power control, and finance. Although general surveys on reinforcement learning techniques already exist, no survey is specifically dedicated to actor-critic algorithms in particular. This paper, therefore, describes the state of the art of actor-critic algorithms, with a focus on methods that can work in an online setting and use function approximation in order to deal with continuous state and action spaces. After starting with a discussion on the concepts of reinforcement learning and the origins of actor-critic algorithms, this paper describes the workings of the natural gradient, which has made its way into many actor-critic algorithms over the past few years. A review of several standard and natural actor-critic algorithms is given, and the paper concludes with an overview of application areas and a discussion on open issues.
@article{grondman2012ActorCriticSurvey,
abstract = {Policy-gradient-based actor-critic algorithms are amongst the most popular algorithms in the reinforcement learning framework. Their advantage of being able to search for optimal policies using low-variance gradient estimates has made them useful in several real-life applications, such as robotics, power control, and finance. Although general surveys on reinforcement learning techniques already exist, no survey is specifically dedicated to actor-critic algorithms in particular. This paper, therefore, describes the state of the art of actor-critic algorithms, with a focus on methods that can work in an online setting and use function approximation in order to deal with continuous state and action spaces. After starting with a discussion on the concepts of reinforcement learning and the origins of actor-critic algorithms, this paper describes the workings of the natural gradient, which has made its way into many actor-critic algorithms over the past few years. A review of several standard and natural actor-critic algorithms is given, and the paper concludes with an overview of application areas and a discussion on open issues.},
added-at = {2019-06-12T23:34:18.000+0200},
author = {{Grondman}, I. and {Busoniu}, L. and {Lopes}, G. A. D. and {Babuska}, R.},
biburl = {https://www.bibsonomy.org/bibtex/2c7b77ccddbb2f1c4b4096420ed989c35/lanteunis},
doi = {10.1109/TSMCC.2012.2218595},
interhash = {771f43d66cb97ae37aef813a30f8313f},
intrahash = {c7b77ccddbb2f1c4b4096420ed989c35},
issn = {1094-6977},
journal = {IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)},
keywords = {actor_critic final reinforcement survey thema:reinforcement_learning_recommender},
month = nov,
number = 6,
pages = {1291-1307},
timestamp = {2019-06-12T23:34:18.000+0200},
title = {A Survey of Actor-Critic Reinforcement Learning: Standard and Natural Policy Gradients},
volume = 42,
year = 2012
}