Mode connectivity is a surprising phenomenon in the loss landscape of deep
nets. Optima---at least those discovered by gradient-based optimization---turn
out to be connected by simple paths on which the loss function is almost
constant. Often, these paths can be chosen to be piece-wise linear, with as few
as two segments.
We give mathematical explanations for this phenomenon, assuming generic
properties (such as dropout stability and noise stability) of well-trained deep
nets, which have previously been identified as part of understanding the
generalization properties of deep nets. Our explanation holds for realistic
multilayer nets, and experiments are presented to verify the theory.
Description
[1906.06247] Explaining Landscape Connectivity of Low-cost Solutions for Multilayer Nets
%0 Journal Article
%1 kuditipudi2019explaining
%A Kuditipudi, Rohith
%A Wang, Xiang
%A Lee, Holden
%A Zhang, Yi
%A Li, Zhiyuan
%A Hu, Wei
%A Arora, Sanjeev
%A Ge, Rong
%D 2019
%K deep-learning foundations machine-learning optimization readings theory
%T Explaining Landscape Connectivity of Low-cost Solutions for Multilayer
Nets
%U http://arxiv.org/abs/1906.06247
%X Mode connectivity is a surprising phenomenon in the loss landscape of deep
nets. Optima---at least those discovered by gradient-based optimization---turn
out to be connected by simple paths on which the loss function is almost
constant. Often, these paths can be chosen to be piece-wise linear, with as few
as two segments.
We give mathematical explanations for this phenomenon, assuming generic
properties (such as dropout stability and noise stability) of well-trained deep
nets, which have previously been identified as part of understanding the
generalization properties of deep nets. Our explanation holds for realistic
multilayer nets, and experiments are presented to verify the theory.
@article{kuditipudi2019explaining,
abstract = {Mode connectivity is a surprising phenomenon in the loss landscape of deep
nets. Optima---at least those discovered by gradient-based optimization---turn
out to be connected by simple paths on which the loss function is almost
constant. Often, these paths can be chosen to be piece-wise linear, with as few
as two segments.
We give mathematical explanations for this phenomenon, assuming generic
properties (such as dropout stability and noise stability) of well-trained deep
nets, which have previously been identified as part of understanding the
generalization properties of deep nets. Our explanation holds for realistic
multilayer nets, and experiments are presented to verify the theory.},
added-at = {2019-06-18T00:53:23.000+0200},
author = {Kuditipudi, Rohith and Wang, Xiang and Lee, Holden and Zhang, Yi and Li, Zhiyuan and Hu, Wei and Arora, Sanjeev and Ge, Rong},
biburl = {https://www.bibsonomy.org/bibtex/291695a873efa27de9850e9edada1d4a5/kirk86},
description = {[1906.06247] Explaining Landscape Connectivity of Low-cost Solutions for Multilayer Nets},
interhash = {fabfe5b301463aee11aca4910cd377df},
intrahash = {91695a873efa27de9850e9edada1d4a5},
keywords = {deep-learning foundations machine-learning optimization readings theory},
note = {cite arxiv:1906.06247},
timestamp = {2019-09-27T21:45:33.000+0200},
title = {Explaining Landscape Connectivity of Low-cost Solutions for Multilayer
Nets},
url = {http://arxiv.org/abs/1906.06247},
year = 2019
}