Due to their simplicity and excellent performance, parallel asynchronous
variants of stochastic gradient descent have become popular methods to solve a
wide range of large-scale optimization problems on multi-core architectures.
Yet, despite their practical success, support for nonsmooth objectives is still
lacking, making them unsuitable for many problems of interest in machine
learning, such as the Lasso, group Lasso or empirical risk minimization with
convex constraints. In this work, we propose and analyze ProxASAGA, a fully
asynchronous sparse method inspired by SAGA, a variance reduced incremental
gradient algorithm. The proposed method is easy to implement and significantly
outperforms the state of the art on several nonsmooth, large-scale problems. We
prove that our method achieves a theoretical linear speedup with respect to the
sequential version under assumptions on the sparsity of gradients and
block-separability of the proximal term. Empirical benchmarks on a multi-core
architecture illustrate practical speedups of up to 12x on a 20-core machine.
%0 Generic
%1 pedregosa2017breaking
%A Pedregosa, Fabian
%A Leblond, Rémi
%A Lacoste-Julien, Simon
%D 2017
%K saga optimization
%T Breaking the Nonsmooth Barrier: A Scalable Parallel Method for Composite Optimization
%U http://arxiv.org/abs/1707.06468
%X Due to their simplicity and excellent performance, parallel asynchronous
variants of stochastic gradient descent have become popular methods to solve a
wide range of large-scale optimization problems on multi-core architectures.
Yet, despite their practical success, support for nonsmooth objectives is still
lacking, making them unsuitable for many problems of interest in machine
learning, such as the Lasso, group Lasso or empirical risk minimization with
convex constraints. In this work, we propose and analyze ProxASAGA, a fully
asynchronous sparse method inspired by SAGA, a variance reduced incremental
gradient algorithm. The proposed method is easy to implement and significantly
outperforms the state of the art on several nonsmooth, large-scale problems. We
prove that our method achieves a theoretical linear speedup with respect to the
sequential version under assumptions on the sparsity of gradients and
block-separability of the proximal term. Empirical benchmarks on a multi-core
architecture illustrate practical speedups of up to 12x on a 20-core machine.
@misc{pedregosa2017breaking,
abstract = {{Due to their simplicity and excellent performance, parallel asynchronous
variants of stochastic gradient descent have become popular methods to solve a
wide range of large-scale optimization problems on multi-core architectures.
Yet, despite their practical success, support for nonsmooth objectives is still
lacking, making them unsuitable for many problems of interest in machine
learning, such as the Lasso, group Lasso or empirical risk minimization with
convex constraints. In this work, we propose and analyze ProxASAGA, a fully
asynchronous sparse method inspired by SAGA, a variance reduced incremental
gradient algorithm. The proposed method is easy to implement and significantly
outperforms the state of the art on several nonsmooth, large-scale problems. We
prove that our method achieves a theoretical linear speedup with respect to the
sequential version under assumptions on the sparsity of gradients and
block-separability of the proximal term. Empirical benchmarks on a multi-core
architecture illustrate practical speedups of up to 12x on a 20-core machine.}},
added-at = {2018-12-07T09:10:16.000+0100},
archiveprefix = {arXiv},
author = {Pedregosa, Fabian and Leblond, R\'{e}mi and Lacoste-Julien, Simon},
biburl = {https://www.bibsonomy.org/bibtex/2dfe11f941d3f33e96102a763bcaf7227/jpvaldes},
citeulike-article-id = {14400314},
citeulike-linkout-0 = {http://arxiv.org/abs/1707.06468},
citeulike-linkout-1 = {http://arxiv.org/pdf/1707.06468},
day = 21,
eprint = {1707.06468},
interhash = {d865887bb4b47087626c7be9609e736c},
intrahash = {dfe11f941d3f33e96102a763bcaf7227},
keywords = {saga optimization},
month = jul,
posted-at = {2017-07-27 08:27:17},
priority = {3},
timestamp = {2018-12-07T09:38:19.000+0100},
title = {{Breaking the Nonsmooth Barrier: A Scalable Parallel Method for Composite Optimization}},
url = {http://arxiv.org/abs/1707.06468},
year = 2017
}