In the Bayesian approach to sequential decision making, exact calculation of
the (subjective) utility is intractable. This extends to most special cases of
interest, such as reinforcement learning problems. While utility bounds are
known to exist for this problem, so far none of them were particularly tight.
In this paper, we show how to efficiently calculate a lower bound, which
corresponds to the utility of a near-optimal memoryless policy for the decision
problem, which is generally different from both the Bayes-optimal policy and
the policy which is optimal for the expected MDP under the current belief. We
then show how these can be applied to obtain robust exploration policies in a
Bayesian reinforcement learning setting.
Описание
Robust Bayesian reinforcement learning through tight lower bounds
%0 Generic
%1 dimitrakakis2011robust
%A Dimitrakakis, Christos
%D 2011
%K Bayesian bounds learning reinforcement
%T Robust Bayesian reinforcement learning through tight lower bounds
%U http://arxiv.org/abs/1106.3651
%X In the Bayesian approach to sequential decision making, exact calculation of
the (subjective) utility is intractable. This extends to most special cases of
interest, such as reinforcement learning problems. While utility bounds are
known to exist for this problem, so far none of them were particularly tight.
In this paper, we show how to efficiently calculate a lower bound, which
corresponds to the utility of a near-optimal memoryless policy for the decision
problem, which is generally different from both the Bayes-optimal policy and
the policy which is optimal for the expected MDP under the current belief. We
then show how these can be applied to obtain robust exploration policies in a
Bayesian reinforcement learning setting.
@misc{dimitrakakis2011robust,
abstract = {In the Bayesian approach to sequential decision making, exact calculation of
the (subjective) utility is intractable. This extends to most special cases of
interest, such as reinforcement learning problems. While utility bounds are
known to exist for this problem, so far none of them were particularly tight.
In this paper, we show how to efficiently calculate a lower bound, which
corresponds to the utility of a near-optimal memoryless policy for the decision
problem, which is generally different from both the Bayes-optimal policy and
the policy which is optimal for the expected MDP under the current belief. We
then show how these can be applied to obtain robust exploration policies in a
Bayesian reinforcement learning setting.},
added-at = {2012-05-27T11:56:55.000+0200},
author = {Dimitrakakis, Christos},
biburl = {https://www.bibsonomy.org/bibtex/21df78250721fcbd34a545caaeba69093/olethros},
description = {Robust Bayesian reinforcement learning through tight lower bounds},
interhash = {0649f8364f3792896bb0abd94916b32f},
intrahash = {1df78250721fcbd34a545caaeba69093},
keywords = {Bayesian bounds learning reinforcement},
note = {cite arxiv:1106.3651Comment: Corrected version. 12 pages, 3 figures, 1 table},
timestamp = {2012-05-27T11:56:55.000+0200},
title = {Robust Bayesian reinforcement learning through tight lower bounds},
url = {http://arxiv.org/abs/1106.3651},
year = 2011
}