Variational Optimization forms a differentiable upper bound on an objective.
We show that approaches such as Natural Evolution Strategies and Gaussian
Perturbation, are special cases of Variational Optimization in which the
expectations are approximated by Gaussian sampling. These approaches are of
particular interest because they are parallelizable. We calculate the
approximate bias and variance of the corresponding gradient estimators and
demonstrate that using antithetic sampling or a baseline is crucial to mitigate
their problems. We contrast these methods with an alternative parallelizable
method, namely Directional Derivatives. We conclude that, for differentiable
objectives, using Directional Derivatives is preferable to using Variational
Optimization to perform parallel Stochastic Gradient Descent.
%0 Journal Article
%1 bird2018stochastic
%A Bird, Thomas
%A Kunze, Julius
%A Barber, David
%D 2018
%K bayesian optimization readings uncertainty variational
%T Stochastic Variational Optimization
%U http://arxiv.org/abs/1809.04855
%X Variational Optimization forms a differentiable upper bound on an objective.
We show that approaches such as Natural Evolution Strategies and Gaussian
Perturbation, are special cases of Variational Optimization in which the
expectations are approximated by Gaussian sampling. These approaches are of
particular interest because they are parallelizable. We calculate the
approximate bias and variance of the corresponding gradient estimators and
demonstrate that using antithetic sampling or a baseline is crucial to mitigate
their problems. We contrast these methods with an alternative parallelizable
method, namely Directional Derivatives. We conclude that, for differentiable
objectives, using Directional Derivatives is preferable to using Variational
Optimization to perform parallel Stochastic Gradient Descent.
@article{bird2018stochastic,
abstract = {Variational Optimization forms a differentiable upper bound on an objective.
We show that approaches such as Natural Evolution Strategies and Gaussian
Perturbation, are special cases of Variational Optimization in which the
expectations are approximated by Gaussian sampling. These approaches are of
particular interest because they are parallelizable. We calculate the
approximate bias and variance of the corresponding gradient estimators and
demonstrate that using antithetic sampling or a baseline is crucial to mitigate
their problems. We contrast these methods with an alternative parallelizable
method, namely Directional Derivatives. We conclude that, for differentiable
objectives, using Directional Derivatives is preferable to using Variational
Optimization to perform parallel Stochastic Gradient Descent.},
added-at = {2019-12-22T18:07:45.000+0100},
author = {Bird, Thomas and Kunze, Julius and Barber, David},
biburl = {https://www.bibsonomy.org/bibtex/2ec4b400553c65c0f8d5152d64759f67d/kirk86},
description = {[1809.04855] Stochastic Variational Optimization},
interhash = {c490d45eca8b52f59ac77599f781892d},
intrahash = {ec4b400553c65c0f8d5152d64759f67d},
keywords = {bayesian optimization readings uncertainty variational},
note = {cite arxiv:1809.04855},
timestamp = {2019-12-22T18:07:45.000+0100},
title = {Stochastic Variational Optimization},
url = {http://arxiv.org/abs/1809.04855},
year = 2018
}