Most gradient-based approaches to meta-learning do not explicitly account for
the fact that different parts of the underlying model adapt by different
amounts when applied to a new task. For example, the input layers of an image
classification convnet typically adapt very little, while the output layers can
change significantly. This can cause parts of the model to begin to overfit
while others underfit. To address this, we introduce a hierarchical Bayesian
model with per-module shrinkage parameters, which we propose to learn by
maximizing an approximation of the predictive likelihood using implicit
differentiation. Our algorithm subsumes Reptile and outperforms variants of
MAML on two synthetic few-shot meta-learning problems.
%0 Journal Article
%1 chen2019modular
%A Chen, Yutian
%A Friesen, Abram L.
%A Behbahani, Feryal
%A Budden, David
%A Hoffman, Matthew W.
%A Doucet, Arnaud
%A de Freitas, Nando
%D 2019
%K bayesian graphs hierarchical markov-processes meta-learning optimization stats theory
%T Modular Meta-Learning with Shrinkage
%U http://arxiv.org/abs/1909.05557
%X Most gradient-based approaches to meta-learning do not explicitly account for
the fact that different parts of the underlying model adapt by different
amounts when applied to a new task. For example, the input layers of an image
classification convnet typically adapt very little, while the output layers can
change significantly. This can cause parts of the model to begin to overfit
while others underfit. To address this, we introduce a hierarchical Bayesian
model with per-module shrinkage parameters, which we propose to learn by
maximizing an approximation of the predictive likelihood using implicit
differentiation. Our algorithm subsumes Reptile and outperforms variants of
MAML on two synthetic few-shot meta-learning problems.
@article{chen2019modular,
abstract = {Most gradient-based approaches to meta-learning do not explicitly account for
the fact that different parts of the underlying model adapt by different
amounts when applied to a new task. For example, the input layers of an image
classification convnet typically adapt very little, while the output layers can
change significantly. This can cause parts of the model to begin to overfit
while others underfit. To address this, we introduce a hierarchical Bayesian
model with per-module shrinkage parameters, which we propose to learn by
maximizing an approximation of the predictive likelihood using implicit
differentiation. Our algorithm subsumes Reptile and outperforms variants of
MAML on two synthetic few-shot meta-learning problems.},
added-at = {2019-09-13T18:44:43.000+0200},
author = {Chen, Yutian and Friesen, Abram L. and Behbahani, Feryal and Budden, David and Hoffman, Matthew W. and Doucet, Arnaud and de Freitas, Nando},
biburl = {https://www.bibsonomy.org/bibtex/2b0839bd54f05618ed4a050d61962c3e2/kirk86},
description = {[1909.05557] Modular Meta-Learning with Shrinkage},
interhash = {65e0f5298ee343cdeecdd7a33c82965e},
intrahash = {b0839bd54f05618ed4a050d61962c3e2},
keywords = {bayesian graphs hierarchical markov-processes meta-learning optimization stats theory},
note = {cite arxiv:1909.05557Comment: 14 pages (4 main, 8 supplement), under review},
timestamp = {2019-09-13T18:44:43.000+0200},
title = {Modular Meta-Learning with Shrinkage},
url = {http://arxiv.org/abs/1909.05557},
year = 2019
}