We introduce a framework that abstracts Reinforcement Learning (RL) as a
sequence modeling problem. This allows us to draw upon the simplicity and
scalability of the Transformer architecture, and associated advances in
language modeling such as GPT-x and BERT. In particular, we present Decision
Transformer, an architecture that casts the problem of RL as conditional
sequence modeling. Unlike prior approaches to RL that fit value functions or
compute policy gradients, Decision Transformer simply outputs the optimal
actions by leveraging a causally masked Transformer. By conditioning an
autoregressive model on the desired return (reward), past states, and actions,
our Decision Transformer model can generate future actions that achieve the
desired return. Despite its simplicity, Decision Transformer matches or exceeds
the performance of state-of-the-art model-free offline RL baselines on Atari,
OpenAI Gym, and Key-to-Door tasks.
%0 Generic
%1 chen2021decision
%A Chen, Lili
%A Lu, Kevin
%A Rajeswaran, Aravind
%A Lee, Kimin
%A Grover, Aditya
%A Laskin, Michael
%A Abbeel, Pieter
%A Srinivas, Aravind
%A Mordatch, Igor
%D 2021
%K decision learning modelling reinforcement sequence transformer
%T Decision Transformer: Reinforcement Learning via Sequence Modeling
%U http://arxiv.org/abs/2106.01345
%X We introduce a framework that abstracts Reinforcement Learning (RL) as a
sequence modeling problem. This allows us to draw upon the simplicity and
scalability of the Transformer architecture, and associated advances in
language modeling such as GPT-x and BERT. In particular, we present Decision
Transformer, an architecture that casts the problem of RL as conditional
sequence modeling. Unlike prior approaches to RL that fit value functions or
compute policy gradients, Decision Transformer simply outputs the optimal
actions by leveraging a causally masked Transformer. By conditioning an
autoregressive model on the desired return (reward), past states, and actions,
our Decision Transformer model can generate future actions that achieve the
desired return. Despite its simplicity, Decision Transformer matches or exceeds
the performance of state-of-the-art model-free offline RL baselines on Atari,
OpenAI Gym, and Key-to-Door tasks.
@misc{chen2021decision,
abstract = {We introduce a framework that abstracts Reinforcement Learning (RL) as a
sequence modeling problem. This allows us to draw upon the simplicity and
scalability of the Transformer architecture, and associated advances in
language modeling such as GPT-x and BERT. In particular, we present Decision
Transformer, an architecture that casts the problem of RL as conditional
sequence modeling. Unlike prior approaches to RL that fit value functions or
compute policy gradients, Decision Transformer simply outputs the optimal
actions by leveraging a causally masked Transformer. By conditioning an
autoregressive model on the desired return (reward), past states, and actions,
our Decision Transformer model can generate future actions that achieve the
desired return. Despite its simplicity, Decision Transformer matches or exceeds
the performance of state-of-the-art model-free offline RL baselines on Atari,
OpenAI Gym, and Key-to-Door tasks.},
added-at = {2021-06-29T22:38:51.000+0200},
author = {Chen, Lili and Lu, Kevin and Rajeswaran, Aravind and Lee, Kimin and Grover, Aditya and Laskin, Michael and Abbeel, Pieter and Srinivas, Aravind and Mordatch, Igor},
biburl = {https://www.bibsonomy.org/bibtex/2ae4e05a284c6f153f623fa1729cd750d/nosebrain},
interhash = {192e4739b858f14d4149f062a8391fb1},
intrahash = {ae4e05a284c6f153f623fa1729cd750d},
keywords = {decision learning modelling reinforcement sequence transformer},
note = {cite arxiv:2106.01345Comment: First two authors contributed equally. Last two authors advised equally},
timestamp = {2021-06-29T22:38:51.000+0200},
title = {Decision Transformer: Reinforcement Learning via Sequence Modeling},
url = {http://arxiv.org/abs/2106.01345},
year = 2021
}