In this paper we consider approximate policy-iteration-based reinforcement learning algorithms. In order to implement a flexible function approximation scheme we propose the use of non-parametric methods with regularization, providing a convenient way to control the complexity of the function approximator. We propose two novel regularized policy iteration algorithms by adding L2-regularization to two widely-used policy evaluation methods: Bellman residual minimization (BRM) and least-squares temporal difference learning (LSTD). We derive efficient implementation for our algorithms when the approximate value-functions belong to a reproducing kernel Hilbert space. We also provide finite-sample performance bounds for our algorithms and show that they are able to achieve optimal rates of convergence under the studied conditions.
%0 Conference Paper
%1 farahmand2008a
%A Farahmand, A.m.
%A Ghavamzadeh, M.
%A Szepesvári, Cs.
%A Mannor, S.
%B NIPS
%D 2008
%K approximation, function iteration learning, nonparametrics, policy regularization, reinforcement theory,
%P 441--448
%T Regularized Policy Iteration
%X In this paper we consider approximate policy-iteration-based reinforcement learning algorithms. In order to implement a flexible function approximation scheme we propose the use of non-parametric methods with regularization, providing a convenient way to control the complexity of the function approximator. We propose two novel regularized policy iteration algorithms by adding L2-regularization to two widely-used policy evaluation methods: Bellman residual minimization (BRM) and least-squares temporal difference learning (LSTD). We derive efficient implementation for our algorithms when the approximate value-functions belong to a reproducing kernel Hilbert space. We also provide finite-sample performance bounds for our algorithms and show that they are able to achieve optimal rates of convergence under the studied conditions.
@inproceedings{farahmand2008a,
abstract = {In this paper we consider approximate policy-iteration-based reinforcement learning algorithms. In order to implement a flexible function approximation scheme we propose the use of non-parametric methods with regularization, providing a convenient way to control the complexity of the function approximator. We propose two novel regularized policy iteration algorithms by adding L2-regularization to two widely-used policy evaluation methods: Bellman residual minimization (BRM) and least-squares temporal difference learning (LSTD). We derive efficient implementation for our algorithms when the approximate value-functions belong to a reproducing kernel Hilbert space. We also provide finite-sample performance bounds for our algorithms and show that they are able to achieve optimal rates of convergence under the studied conditions.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Farahmand, A.{m}. and Ghavamzadeh, M. and Szepesv{\'a}ri, {Cs}. and Mannor, S.},
bibsource = {DBLP, http://dblp.uni-trier.de},
biburl = {https://www.bibsonomy.org/bibtex/2934b7ad4e5ff02c2f1ffd5ce4d87f3ba/csaba},
booktitle = {NIPS},
crossref = {NIPS21},
date-added = {2010-08-28 17:38:14 -0600},
date-modified = {2010-11-25 00:51:02 -0700},
ee = {http://books.nips.cc/papers/files/nips21/NIPS2008_0871.pdf},
interhash = {71e1d7d3a7b953620f208cf1612c3b2a},
intrahash = {934b7ad4e5ff02c2f1ffd5ce4d87f3ba},
keywords = {approximation, function iteration learning, nonparametrics, policy regularization, reinforcement theory,},
pages = {441--448},
pdf = {papers/nips08-regrl.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Regularized Policy Iteration},
year = 2008
}