From a young age humans learn to use grammatical principles to hierarchically
combine words into sentences. Action grammars is the parallel idea, that there
is an underlying set of rules (a "grammar") that govern how we hierarchically
combine actions to form new, more complex actions. We introduce the Action
Grammar Reinforcement Learning (AG-RL) framework which leverages the concept of
action grammars to consistently improve the sample efficiency of Reinforcement
Learning agents. AG-RL works by using a grammar inference algorithm to infer
the äction grammar" of an agent midway through training. The agent's action
space is then augmented with macro-actions identified by the grammar. We apply
this framework to Double Deep Q-Learning (AG-DDQN) and a discrete action
version of Soft Actor-Critic (AG-SAC) and find that it improves performance in
8 out of 8 tested Atari games (median +31%, max +668%) and 19 out of 20 tested
Atari games (median +96%, maximum +3,756%) respectively without substantive
hyperparameter tuning. We also show that AG-SAC beats the model-free
state-of-the-art for sample efficiency in 17 out of the 20 tested Atari games
(median +62%, maximum +13,140%), again without substantive hyperparameter
tuning.
Description
[1910.02876] Reinforcement Learning with Structured Hierarchical Grammar Representations of Actions
%0 Journal Article
%1 christodoulou2019reinforcement
%A Christodoulou, Petros
%A Lange, Robert Tjarko
%A Shafti, Ali
%A Faisal, A. Aldo
%D 2019
%K optimization reinforcement-learning
%T Reinforcement Learning with Structured Hierarchical Grammar
Representations of Actions
%U http://arxiv.org/abs/1910.02876
%X From a young age humans learn to use grammatical principles to hierarchically
combine words into sentences. Action grammars is the parallel idea, that there
is an underlying set of rules (a "grammar") that govern how we hierarchically
combine actions to form new, more complex actions. We introduce the Action
Grammar Reinforcement Learning (AG-RL) framework which leverages the concept of
action grammars to consistently improve the sample efficiency of Reinforcement
Learning agents. AG-RL works by using a grammar inference algorithm to infer
the äction grammar" of an agent midway through training. The agent's action
space is then augmented with macro-actions identified by the grammar. We apply
this framework to Double Deep Q-Learning (AG-DDQN) and a discrete action
version of Soft Actor-Critic (AG-SAC) and find that it improves performance in
8 out of 8 tested Atari games (median +31%, max +668%) and 19 out of 20 tested
Atari games (median +96%, maximum +3,756%) respectively without substantive
hyperparameter tuning. We also show that AG-SAC beats the model-free
state-of-the-art for sample efficiency in 17 out of the 20 tested Atari games
(median +62%, maximum +13,140%), again without substantive hyperparameter
tuning.
@article{christodoulou2019reinforcement,
abstract = {From a young age humans learn to use grammatical principles to hierarchically
combine words into sentences. Action grammars is the parallel idea, that there
is an underlying set of rules (a "grammar") that govern how we hierarchically
combine actions to form new, more complex actions. We introduce the Action
Grammar Reinforcement Learning (AG-RL) framework which leverages the concept of
action grammars to consistently improve the sample efficiency of Reinforcement
Learning agents. AG-RL works by using a grammar inference algorithm to infer
the "action grammar" of an agent midway through training. The agent's action
space is then augmented with macro-actions identified by the grammar. We apply
this framework to Double Deep Q-Learning (AG-DDQN) and a discrete action
version of Soft Actor-Critic (AG-SAC) and find that it improves performance in
8 out of 8 tested Atari games (median +31%, max +668%) and 19 out of 20 tested
Atari games (median +96%, maximum +3,756%) respectively without substantive
hyperparameter tuning. We also show that AG-SAC beats the model-free
state-of-the-art for sample efficiency in 17 out of the 20 tested Atari games
(median +62%, maximum +13,140%), again without substantive hyperparameter
tuning.},
added-at = {2019-10-15T20:19:57.000+0200},
author = {Christodoulou, Petros and Lange, Robert Tjarko and Shafti, Ali and Faisal, A. Aldo},
biburl = {https://www.bibsonomy.org/bibtex/27a9d6722868c5ca06a6b46a9c80b2dfd/kirk86},
description = {[1910.02876] Reinforcement Learning with Structured Hierarchical Grammar Representations of Actions},
interhash = {713687939ef50399a6a5e3cc50322f1b},
intrahash = {7a9d6722868c5ca06a6b46a9c80b2dfd},
keywords = {optimization reinforcement-learning},
note = {cite arxiv:1910.02876},
timestamp = {2019-10-15T20:19:57.000+0200},
title = {Reinforcement Learning with Structured Hierarchical Grammar
Representations of Actions},
url = {http://arxiv.org/abs/1910.02876},
year = 2019
}