We study the problem of estimating the covariance matrix of a
high-dimensional distribution when a small constant fraction of the samples can
be arbitrarily corrupted. Recent work gave the first polynomial time algorithms
for this problem with near-optimal error guarantees for several natural
structured distributions. Our main contribution is to develop faster algorithms
for this problem whose running time nearly matches that of computing the
empirical covariance.
Given $N = Ømega(d^2/\epsilon^2)$ samples from a $d$-dimensional
Gaussian distribution, an $\epsilon$-fraction of which may be arbitrarily
corrupted, our algorithm runs in time
$O(d^3.26)/poly(\epsilon)$ and approximates the unknown
covariance matrix to optimal error up to a logarithmic factor. Previous robust
algorithms with comparable error guarantees all have runtimes
$Ømega(d^2 ømega)$ when $= Ømega(1)$, where $ømega$ is
the exponent of matrix multiplication. We also provide evidence that improving
the running time of our algorithm may require new algorithmic techniques.
Description
[1906.04661] Faster Algorithms for High-Dimensional Robust Covariance Estimation
%0 Journal Article
%1 cheng2019faster
%A Cheng, Yu
%A Diakonikolas, Ilias
%A Ge, Rong
%A Woodruff, David
%D 2019
%K robustness stats
%T Faster Algorithms for High-Dimensional Robust Covariance Estimation
%U http://arxiv.org/abs/1906.04661
%X We study the problem of estimating the covariance matrix of a
high-dimensional distribution when a small constant fraction of the samples can
be arbitrarily corrupted. Recent work gave the first polynomial time algorithms
for this problem with near-optimal error guarantees for several natural
structured distributions. Our main contribution is to develop faster algorithms
for this problem whose running time nearly matches that of computing the
empirical covariance.
Given $N = Ømega(d^2/\epsilon^2)$ samples from a $d$-dimensional
Gaussian distribution, an $\epsilon$-fraction of which may be arbitrarily
corrupted, our algorithm runs in time
$O(d^3.26)/poly(\epsilon)$ and approximates the unknown
covariance matrix to optimal error up to a logarithmic factor. Previous robust
algorithms with comparable error guarantees all have runtimes
$Ømega(d^2 ømega)$ when $= Ømega(1)$, where $ømega$ is
the exponent of matrix multiplication. We also provide evidence that improving
the running time of our algorithm may require new algorithmic techniques.
@article{cheng2019faster,
abstract = {We study the problem of estimating the covariance matrix of a
high-dimensional distribution when a small constant fraction of the samples can
be arbitrarily corrupted. Recent work gave the first polynomial time algorithms
for this problem with near-optimal error guarantees for several natural
structured distributions. Our main contribution is to develop faster algorithms
for this problem whose running time nearly matches that of computing the
empirical covariance.
Given $N = \tilde{\Omega}(d^2/\epsilon^2)$ samples from a $d$-dimensional
Gaussian distribution, an $\epsilon$-fraction of which may be arbitrarily
corrupted, our algorithm runs in time
$\tilde{O}(d^{3.26})/\mathrm{poly}(\epsilon)$ and approximates the unknown
covariance matrix to optimal error up to a logarithmic factor. Previous robust
algorithms with comparable error guarantees all have runtimes
$\tilde{\Omega}(d^{2 \omega})$ when $\epsilon = \Omega(1)$, where $\omega$ is
the exponent of matrix multiplication. We also provide evidence that improving
the running time of our algorithm may require new algorithmic techniques.},
added-at = {2020-02-26T13:37:43.000+0100},
author = {Cheng, Yu and Diakonikolas, Ilias and Ge, Rong and Woodruff, David},
biburl = {https://www.bibsonomy.org/bibtex/22783c9f0d4f93335305d01ed6ca7ef6b/kirk86},
description = {[1906.04661] Faster Algorithms for High-Dimensional Robust Covariance Estimation},
interhash = {0d946d3aa98e314941f2a9d9996b30f1},
intrahash = {2783c9f0d4f93335305d01ed6ca7ef6b},
keywords = {robustness stats},
note = {cite arxiv:1906.04661},
timestamp = {2020-02-26T13:37:43.000+0100},
title = {Faster Algorithms for High-Dimensional Robust Covariance Estimation},
url = {http://arxiv.org/abs/1906.04661},
year = 2019
}