In reinforcement learning, we can learn a model of future observations and
rewards, and use it to plan the agent's next actions. However, jointly modeling
future observations can be computationally expensive or even intractable if the
observations are high-dimensional (e.g. images). For this reason, previous
works have considered partial models, which model only part of the observation.
In this paper, we show that partial models can be causally incorrect: they are
confounded by the observations they don't model, and can therefore lead to
incorrect planning. To address this, we introduce a general family of partial
models that are provably causally correct, yet remain fast because they do not
need to fully model future observations.
Description
[2002.02836] Causally Correct Partial Models for Reinforcement Learning
%0 Journal Article
%1 rezende2020causally
%A Rezende, Danilo J.
%A Danihelka, Ivo
%A Papamakarios, George
%A Ke, Nan Rosemary
%A Jiang, Ray
%A Weber, Theophane
%A Gregor, Karol
%A Merzic, Hamza
%A Viola, Fabio
%A Wang, Jane
%A Mitrovic, Jovana
%A Besse, Frederic
%A Antonoglou, Ioannis
%A Buesing, Lars
%D 2020
%K causal-analysis reinforcement-learning
%T Causally Correct Partial Models for Reinforcement Learning
%U http://arxiv.org/abs/2002.02836
%X In reinforcement learning, we can learn a model of future observations and
rewards, and use it to plan the agent's next actions. However, jointly modeling
future observations can be computationally expensive or even intractable if the
observations are high-dimensional (e.g. images). For this reason, previous
works have considered partial models, which model only part of the observation.
In this paper, we show that partial models can be causally incorrect: they are
confounded by the observations they don't model, and can therefore lead to
incorrect planning. To address this, we introduce a general family of partial
models that are provably causally correct, yet remain fast because they do not
need to fully model future observations.
@article{rezende2020causally,
abstract = {In reinforcement learning, we can learn a model of future observations and
rewards, and use it to plan the agent's next actions. However, jointly modeling
future observations can be computationally expensive or even intractable if the
observations are high-dimensional (e.g. images). For this reason, previous
works have considered partial models, which model only part of the observation.
In this paper, we show that partial models can be causally incorrect: they are
confounded by the observations they don't model, and can therefore lead to
incorrect planning. To address this, we introduce a general family of partial
models that are provably causally correct, yet remain fast because they do not
need to fully model future observations.},
added-at = {2020-02-11T12:42:35.000+0100},
author = {Rezende, Danilo J. and Danihelka, Ivo and Papamakarios, George and Ke, Nan Rosemary and Jiang, Ray and Weber, Theophane and Gregor, Karol and Merzic, Hamza and Viola, Fabio and Wang, Jane and Mitrovic, Jovana and Besse, Frederic and Antonoglou, Ioannis and Buesing, Lars},
biburl = {https://www.bibsonomy.org/bibtex/2a201c3a477d535328cb6c0c877b1b411/kirk86},
description = {[2002.02836] Causally Correct Partial Models for Reinforcement Learning},
interhash = {468b6331a3bcd658dbb0dc7e2c0c573b},
intrahash = {a201c3a477d535328cb6c0c877b1b411},
keywords = {causal-analysis reinforcement-learning},
note = {cite arxiv:2002.02836},
timestamp = {2020-02-11T12:42:35.000+0100},
title = {Causally Correct Partial Models for Reinforcement Learning},
url = {http://arxiv.org/abs/2002.02836},
year = 2020
}