Optimal solutions to Markov decision problems may be very sensitive
with respect to the state transition probabilities. In many practical
problems, the estimation of these probabilities is far from accurate.
Hence, estimation errors are limiting factors in applying Markov
decision processes to real-world problems. We consider a robust control
problem for a finite-state, finite-action Markov decision process,
where uncertainty on the transition matrices is described in terms
of possibly nonconvex sets. We show that perfect duality holds for
this problem, and that as a consequence, it can be solved with a
variant of the classical dynamic programming algorithm, the "robust
dynamic programming" algorithm. We show that a particular choice
of the uncertainty sets, involving likelihood regions or entropy
bounds, leads to both a statistically accurate representation of
uncertainty, and a complexity of the robust recursion that is almost
the same as that of the classical recursion. Hence, robustness can
be added at practically no extra computing cost. We derive similar
results for other uncertainty sets, including one with a finite number
of possible values for the transition matrices. We describe in a
practical path planning example the benefits of using a robust strategy
instead of the classical optimal strategy; even if the uncertainty
level is only crudely guessed, the robust strategy yields a much
better worst-case expected travel time.
%0 Journal Article
%1 Nilim:2005
%A Nilim, Arnab
%A Ghaoui, Laurent El
%D 2005
%J Operations Research
%K *ALGORITHMS *DECISION *MARKOV *PROBABILITIES Markov convex dynamic estimation finite making processes programming robustness state statistics uncertainty
%P 780-798
%T Robust Control of Markov Decision Processes with Uncertain Transition
Matrices
%V 53
%X Optimal solutions to Markov decision problems may be very sensitive
with respect to the state transition probabilities. In many practical
problems, the estimation of these probabilities is far from accurate.
Hence, estimation errors are limiting factors in applying Markov
decision processes to real-world problems. We consider a robust control
problem for a finite-state, finite-action Markov decision process,
where uncertainty on the transition matrices is described in terms
of possibly nonconvex sets. We show that perfect duality holds for
this problem, and that as a consequence, it can be solved with a
variant of the classical dynamic programming algorithm, the "robust
dynamic programming" algorithm. We show that a particular choice
of the uncertainty sets, involving likelihood regions or entropy
bounds, leads to both a statistically accurate representation of
uncertainty, and a complexity of the robust recursion that is almost
the same as that of the classical recursion. Hence, robustness can
be added at practically no extra computing cost. We derive similar
results for other uncertainty sets, including one with a finite number
of possible values for the transition matrices. We describe in a
practical path planning example the benefits of using a robust strategy
instead of the classical optimal strategy; even if the uncertainty
level is only crudely guessed, the robust strategy yields a much
better worst-case expected travel time.
@article{Nilim:2005,
abstract = {Optimal solutions to Markov decision problems may be very sensitive
with respect to the state transition probabilities. In many practical
problems, the estimation of these probabilities is far from accurate.
Hence, estimation errors are limiting factors in applying Markov
decision processes to real-world problems. We consider a robust control
problem for a finite-state, finite-action Markov decision process,
where uncertainty on the transition matrices is described in terms
of possibly nonconvex sets. We show that perfect duality holds for
this problem, and that as a consequence, it can be solved with a
variant of the classical dynamic programming algorithm, the "robust
dynamic programming" algorithm. We show that a particular choice
of the uncertainty sets, involving likelihood regions or entropy
bounds, leads to both a statistically accurate representation of
uncertainty, and a complexity of the robust recursion that is almost
the same as that of the classical recursion. Hence, robustness can
be added at practically no extra computing cost. We derive similar
results for other uncertainty sets, including one with a finite number
of possible values for the transition matrices. We describe in a
practical path planning example the benefits of using a robust strategy
instead of the classical optimal strategy; even if the uncertainty
level is only crudely guessed, the robust strategy yields a much
better worst-case expected travel time.},
added-at = {2009-06-26T15:25:19.000+0200},
author = {Nilim, Arnab and Ghaoui, Laurent El},
biburl = {https://www.bibsonomy.org/bibtex/29f0a359332f4138d93bdec5f83d9338d/butz},
description = {diverse cognitive systems bib},
interhash = {3f2cdcf01be2bbfcf1cad64a11a3656c},
intrahash = {9f0a359332f4138d93bdec5f83d9338d},
journal = {Operations Research},
keywords = {*ALGORITHMS *DECISION *MARKOV *PROBABILITIES Markov convex dynamic estimation finite making processes programming robustness state statistics uncertainty},
owner = {butz},
pages = {780-798},
timestamp = {2009-06-26T15:25:49.000+0200},
title = {Robust Control of Markov Decision Processes with Uncertain Transition
Matrices},
volume = 53,
year = 2005
}