We study the fundamental problem of high-dimensional mean estimation in a
robust model where a constant fraction of the samples are adversarially
corrupted. Recent work gave the first polynomial time algorithms for this
problem with dimension-independent error guarantees for several families of
structured distributions.
In this work, we give the first nearly-linear time algorithms for
high-dimensional robust mean estimation. Specifically, we focus on
distributions with (i) known covariance and sub-gaussian tails, and (ii)
unknown bounded covariance. Given $N$ samples on $R^d$, an
$\epsilon$-fraction of which may be arbitrarily corrupted, our algorithms run
in time $O(Nd) / poly(\epsilon)$ and approximate the true mean
within the information-theoretically optimal error, up to constant factors.
Previous robust algorithms with comparable error guarantees have running times
$Ømega(N d^2)$, for $= Ømega(1)$.
Our algorithms rely on a natural family of SDPs parameterized by our current
guess $\nu$ for the unknown mean $\mu^\star$. We give a win-win analysis
establishing the following: either a near-optimal solution to the primal SDP
yields a good candidate for $\mu^\star$ -- independent of our current guess
$\nu$ -- or the dual SDP yields a new guess $\nu'$ whose distance from
$\mu^\star$ is smaller by a constant factor. We exploit the special structure
of the corresponding SDPs to show that they are approximately solvable in
nearly-linear time. Our approach is quite general, and we believe it can also
be applied to obtain nearly-linear time algorithms for other high-dimensional
robust learning problems.
Beschreibung
[1811.09380] High-Dimensional Robust Mean Estimation in Nearly-Linear Time
%0 Journal Article
%1 cheng2018highdimensional
%A Cheng, Yu
%A Diakonikolas, Ilias
%A Ge, Rong
%D 2018
%K robustness stats
%T High-Dimensional Robust Mean Estimation in Nearly-Linear Time
%U http://arxiv.org/abs/1811.09380
%X We study the fundamental problem of high-dimensional mean estimation in a
robust model where a constant fraction of the samples are adversarially
corrupted. Recent work gave the first polynomial time algorithms for this
problem with dimension-independent error guarantees for several families of
structured distributions.
In this work, we give the first nearly-linear time algorithms for
high-dimensional robust mean estimation. Specifically, we focus on
distributions with (i) known covariance and sub-gaussian tails, and (ii)
unknown bounded covariance. Given $N$ samples on $R^d$, an
$\epsilon$-fraction of which may be arbitrarily corrupted, our algorithms run
in time $O(Nd) / poly(\epsilon)$ and approximate the true mean
within the information-theoretically optimal error, up to constant factors.
Previous robust algorithms with comparable error guarantees have running times
$Ømega(N d^2)$, for $= Ømega(1)$.
Our algorithms rely on a natural family of SDPs parameterized by our current
guess $\nu$ for the unknown mean $\mu^\star$. We give a win-win analysis
establishing the following: either a near-optimal solution to the primal SDP
yields a good candidate for $\mu^\star$ -- independent of our current guess
$\nu$ -- or the dual SDP yields a new guess $\nu'$ whose distance from
$\mu^\star$ is smaller by a constant factor. We exploit the special structure
of the corresponding SDPs to show that they are approximately solvable in
nearly-linear time. Our approach is quite general, and we believe it can also
be applied to obtain nearly-linear time algorithms for other high-dimensional
robust learning problems.
@article{cheng2018highdimensional,
abstract = {We study the fundamental problem of high-dimensional mean estimation in a
robust model where a constant fraction of the samples are adversarially
corrupted. Recent work gave the first polynomial time algorithms for this
problem with dimension-independent error guarantees for several families of
structured distributions.
In this work, we give the first nearly-linear time algorithms for
high-dimensional robust mean estimation. Specifically, we focus on
distributions with (i) known covariance and sub-gaussian tails, and (ii)
unknown bounded covariance. Given $N$ samples on $\mathbb{R}^d$, an
$\epsilon$-fraction of which may be arbitrarily corrupted, our algorithms run
in time $\tilde{O}(Nd) / \mathrm{poly}(\epsilon)$ and approximate the true mean
within the information-theoretically optimal error, up to constant factors.
Previous robust algorithms with comparable error guarantees have running times
$\tilde{\Omega}(N d^2)$, for $\epsilon = \Omega(1)$.
Our algorithms rely on a natural family of SDPs parameterized by our current
guess $\nu$ for the unknown mean $\mu^\star$. We give a win-win analysis
establishing the following: either a near-optimal solution to the primal SDP
yields a good candidate for $\mu^\star$ -- independent of our current guess
$\nu$ -- or the dual SDP yields a new guess $\nu'$ whose distance from
$\mu^\star$ is smaller by a constant factor. We exploit the special structure
of the corresponding SDPs to show that they are approximately solvable in
nearly-linear time. Our approach is quite general, and we believe it can also
be applied to obtain nearly-linear time algorithms for other high-dimensional
robust learning problems.},
added-at = {2020-02-26T13:38:33.000+0100},
author = {Cheng, Yu and Diakonikolas, Ilias and Ge, Rong},
biburl = {https://www.bibsonomy.org/bibtex/2ea9c43b51a2409e0d5ab230cc1506627/kirk86},
description = {[1811.09380] High-Dimensional Robust Mean Estimation in Nearly-Linear Time},
interhash = {4064109f1f942ca3b81fde21b9e1ba24},
intrahash = {ea9c43b51a2409e0d5ab230cc1506627},
keywords = {robustness stats},
note = {cite arxiv:1811.09380},
timestamp = {2020-02-26T13:38:33.000+0100},
title = {High-Dimensional Robust Mean Estimation in Nearly-Linear Time},
url = {http://arxiv.org/abs/1811.09380},
year = 2018
}