The computational and storage complexity of kernel machines presents the
primary barrier to their scaling to large, modern, datasets. A common way to
tackle the scalability issue is to use the conjugate gradient algorithm, which
relieves the constraints on both storage (the kernel matrix need not be stored)
and computation (both stochastic gradients and parallelization can be used).
Even so, conjugate gradient is not without its own issues: the conditioning of
kernel matrices is often such that conjugate gradients will have poor
convergence in practice. Preconditioning is a common approach to alleviating
this issue. Here we propose preconditioned conjugate gradients for kernel
machines, and develop a broad range of preconditioners particularly useful for
kernel matrices. We describe a scalable approach to both solving kernel
machines and learning their hyperparameters. We show this approach is exact in
the limit of iterations and outperforms state-of-the-art approximations for a
given computational budget.
%0 Generic
%1 cutajar2016preconditioning
%A Cutajar, Kurt
%A Osborne, Michael A.
%A Cunningham, John P.
%A Filippone, Maurizio
%D 2016
%K Gaussian_processes linear_algebra preconditioning
%T Preconditioning Kernel Matrices
%U http://arxiv.org/abs/1602.06693
%X The computational and storage complexity of kernel machines presents the
primary barrier to their scaling to large, modern, datasets. A common way to
tackle the scalability issue is to use the conjugate gradient algorithm, which
relieves the constraints on both storage (the kernel matrix need not be stored)
and computation (both stochastic gradients and parallelization can be used).
Even so, conjugate gradient is not without its own issues: the conditioning of
kernel matrices is often such that conjugate gradients will have poor
convergence in practice. Preconditioning is a common approach to alleviating
this issue. Here we propose preconditioned conjugate gradients for kernel
machines, and develop a broad range of preconditioners particularly useful for
kernel matrices. We describe a scalable approach to both solving kernel
machines and learning their hyperparameters. We show this approach is exact in
the limit of iterations and outperforms state-of-the-art approximations for a
given computational budget.
@misc{cutajar2016preconditioning,
abstract = {The computational and storage complexity of kernel machines presents the
primary barrier to their scaling to large, modern, datasets. A common way to
tackle the scalability issue is to use the conjugate gradient algorithm, which
relieves the constraints on both storage (the kernel matrix need not be stored)
and computation (both stochastic gradients and parallelization can be used).
Even so, conjugate gradient is not without its own issues: the conditioning of
kernel matrices is often such that conjugate gradients will have poor
convergence in practice. Preconditioning is a common approach to alleviating
this issue. Here we propose preconditioned conjugate gradients for kernel
machines, and develop a broad range of preconditioners particularly useful for
kernel matrices. We describe a scalable approach to both solving kernel
machines and learning their hyperparameters. We show this approach is exact in
the limit of iterations and outperforms state-of-the-art approximations for a
given computational budget.},
added-at = {2021-02-10T23:10:53.000+0100},
author = {Cutajar, Kurt and Osborne, Michael A. and Cunningham, John P. and Filippone, Maurizio},
biburl = {https://www.bibsonomy.org/bibtex/2aeb4b33372b05953b7e7d91b5aeef524/peter.ralph},
interhash = {0a7dc5633a77d48def64b785f172a5e9},
intrahash = {aeb4b33372b05953b7e7d91b5aeef524},
keywords = {Gaussian_processes linear_algebra preconditioning},
note = {cite arxiv:1602.06693},
timestamp = {2021-02-10T23:10:53.000+0100},
title = {Preconditioning Kernel Matrices},
url = {http://arxiv.org/abs/1602.06693},
year = 2016
}