In this work, we develop a new approximation method to solve the analytically
intractable Bayesian inference for Gaussian process models with factorizable
Gaussian likelihoods and single-output latent functions. Our method -- dubbed
QP -- is similar to the expectation propagation (EP), however it minimizes the
$L^2$ Wasserstein distance instead of the Kullback-Leibler (KL) divergence. We
consider the specific case in which the non-Gaussian likelihood is approximated
by the Gaussian likelihood. We show that QP has the following properties: (1)
QP matches quantile functions rather than moments in EP; (2) QP and EP have the
same local update for the mean of the approximate Gaussian likelihood; (3) the
local variance estimate for the approximate likelihood is smaller for QP than
for EP's, addressing EP's over-estimation of the variance; (4) the optimal
approximate Gaussian likelihood enjoys a univariate parameterization, reducing
memory consumption and computation time. Furthermore, we provide a unified
interpretations of EP and QP -- both are coordinate descent algorithms of a KL
and an $L^2$ Wasserstein global objective function respectively, under the same
assumptions. In the performed experiments, we employ eight real world datasets
and we show that QP outperforms EP for the task of Gaussian process binary
classification.
Description
[1912.10200] Quantile Propagation for Wasserstein-Approximate Gaussian Processes
%0 Journal Article
%1 zhang2019quantile
%A Zhang, Rui
%A Walder, Christian J.
%A Bonilla, Edwin V.
%A Rizoiu, Marian-Andrei
%A Xie, Lexing
%D 2019
%K approximate bayesian gaussian-proceses readings uncertainty
%T Quantile Propagation for Wasserstein-Approximate Gaussian Processes
%U http://arxiv.org/abs/1912.10200
%X In this work, we develop a new approximation method to solve the analytically
intractable Bayesian inference for Gaussian process models with factorizable
Gaussian likelihoods and single-output latent functions. Our method -- dubbed
QP -- is similar to the expectation propagation (EP), however it minimizes the
$L^2$ Wasserstein distance instead of the Kullback-Leibler (KL) divergence. We
consider the specific case in which the non-Gaussian likelihood is approximated
by the Gaussian likelihood. We show that QP has the following properties: (1)
QP matches quantile functions rather than moments in EP; (2) QP and EP have the
same local update for the mean of the approximate Gaussian likelihood; (3) the
local variance estimate for the approximate likelihood is smaller for QP than
for EP's, addressing EP's over-estimation of the variance; (4) the optimal
approximate Gaussian likelihood enjoys a univariate parameterization, reducing
memory consumption and computation time. Furthermore, we provide a unified
interpretations of EP and QP -- both are coordinate descent algorithms of a KL
and an $L^2$ Wasserstein global objective function respectively, under the same
assumptions. In the performed experiments, we employ eight real world datasets
and we show that QP outperforms EP for the task of Gaussian process binary
classification.
@article{zhang2019quantile,
abstract = {In this work, we develop a new approximation method to solve the analytically
intractable Bayesian inference for Gaussian process models with factorizable
Gaussian likelihoods and single-output latent functions. Our method -- dubbed
QP -- is similar to the expectation propagation (EP), however it minimizes the
$L^2$ Wasserstein distance instead of the Kullback-Leibler (KL) divergence. We
consider the specific case in which the non-Gaussian likelihood is approximated
by the Gaussian likelihood. We show that QP has the following properties: (1)
QP matches quantile functions rather than moments in EP; (2) QP and EP have the
same local update for the mean of the approximate Gaussian likelihood; (3) the
local variance estimate for the approximate likelihood is smaller for QP than
for EP's, addressing EP's over-estimation of the variance; (4) the optimal
approximate Gaussian likelihood enjoys a univariate parameterization, reducing
memory consumption and computation time. Furthermore, we provide a unified
interpretations of EP and QP -- both are coordinate descent algorithms of a KL
and an $L^2$ Wasserstein global objective function respectively, under the same
assumptions. In the performed experiments, we employ eight real world datasets
and we show that QP outperforms EP for the task of Gaussian process binary
classification.},
added-at = {2019-12-27T20:04:56.000+0100},
author = {Zhang, Rui and Walder, Christian J. and Bonilla, Edwin V. and Rizoiu, Marian-Andrei and Xie, Lexing},
biburl = {https://www.bibsonomy.org/bibtex/29e2119550c4fe9abb818c06fa6334e04/kirk86},
description = {[1912.10200] Quantile Propagation for Wasserstein-Approximate Gaussian Processes},
interhash = {5a33faf8fca396d1b510090cfc21259f},
intrahash = {9e2119550c4fe9abb818c06fa6334e04},
keywords = {approximate bayesian gaussian-proceses readings uncertainty},
note = {cite arxiv:1912.10200},
timestamp = {2019-12-27T20:04:56.000+0100},
title = {Quantile Propagation for Wasserstein-Approximate Gaussian Processes},
url = {http://arxiv.org/abs/1912.10200},
year = 2019
}