We lower bound the complexity of finding $\epsilon$-stationary points (with
gradient norm at most $\epsilon$) using stochastic first-order methods. In a
well-studied model where algorithms access smooth, potentially non-convex
functions through queries to an unbiased stochastic gradient oracle with
bounded variance, we prove that (in the worst case) any algorithm requires at
least $\epsilon^-4$ queries to find an $\epsilon$ stationary point. The lower
bound is tight, and establishes that stochastic gradient descent is minimax
optimal in this model. In a more restrictive model where the noisy gradient
estimates satisfy a mean-squared smoothness property, we prove a lower bound of
$\epsilon^-3$ queries, establishing the optimality of recently proposed
variance reduction techniques.
Description
[1912.02365] Lower Bounds for Non-Convex Stochastic Optimization
%0 Journal Article
%1 arjevani2019lower
%A Arjevani, Yossi
%A Carmon, Yair
%A Duchi, John C.
%A Foster, Dylan J.
%A Srebro, Nathan
%A Woodworth, Blake
%D 2019
%K bounds generalization optimization readings
%T Lower Bounds for Non-Convex Stochastic Optimization
%U http://arxiv.org/abs/1912.02365
%X We lower bound the complexity of finding $\epsilon$-stationary points (with
gradient norm at most $\epsilon$) using stochastic first-order methods. In a
well-studied model where algorithms access smooth, potentially non-convex
functions through queries to an unbiased stochastic gradient oracle with
bounded variance, we prove that (in the worst case) any algorithm requires at
least $\epsilon^-4$ queries to find an $\epsilon$ stationary point. The lower
bound is tight, and establishes that stochastic gradient descent is minimax
optimal in this model. In a more restrictive model where the noisy gradient
estimates satisfy a mean-squared smoothness property, we prove a lower bound of
$\epsilon^-3$ queries, establishing the optimality of recently proposed
variance reduction techniques.
@article{arjevani2019lower,
abstract = {We lower bound the complexity of finding $\epsilon$-stationary points (with
gradient norm at most $\epsilon$) using stochastic first-order methods. In a
well-studied model where algorithms access smooth, potentially non-convex
functions through queries to an unbiased stochastic gradient oracle with
bounded variance, we prove that (in the worst case) any algorithm requires at
least $\epsilon^{-4}$ queries to find an $\epsilon$ stationary point. The lower
bound is tight, and establishes that stochastic gradient descent is minimax
optimal in this model. In a more restrictive model where the noisy gradient
estimates satisfy a mean-squared smoothness property, we prove a lower bound of
$\epsilon^{-3}$ queries, establishing the optimality of recently proposed
variance reduction techniques.},
added-at = {2020-01-06T03:27:58.000+0100},
author = {Arjevani, Yossi and Carmon, Yair and Duchi, John C. and Foster, Dylan J. and Srebro, Nathan and Woodworth, Blake},
biburl = {https://www.bibsonomy.org/bibtex/2e3b68781ac6299fbd31cb524330b5b53/kirk86},
description = {[1912.02365] Lower Bounds for Non-Convex Stochastic Optimization},
interhash = {246ccd023ec122e319b5e948cf7d3f01},
intrahash = {e3b68781ac6299fbd31cb524330b5b53},
keywords = {bounds generalization optimization readings},
note = {cite arxiv:1912.02365},
timestamp = {2020-01-06T03:27:58.000+0100},
title = {Lower Bounds for Non-Convex Stochastic Optimization},
url = {http://arxiv.org/abs/1912.02365},
year = 2019
}