While the universal approximation property holds both for hierarchical and
shallow networks, we prove that deep (hierarchical) networks can approximate
the class of compositional functions with the same accuracy as shallow networks
but with exponentially lower number of training parameters as well as
VC-dimension. This theorem settles an old conjecture by Bengio on the role of
depth in networks. We then define a general class of scalable, shift-invariant
algorithms to show a simple and natural set of requirements that justify deep
convolutional networks.
Description
[1603.00988] Learning Functions: When Is Deep Better Than Shallow
%0 Journal Article
%1 mhaskar2016learning
%A Mhaskar, Hrushikesh
%A Liao, Qianli
%A Poggio, Tomaso
%D 2016
%K approximate complexity generalization readings
%T Learning Functions: When Is Deep Better Than Shallow
%U http://arxiv.org/abs/1603.00988
%X While the universal approximation property holds both for hierarchical and
shallow networks, we prove that deep (hierarchical) networks can approximate
the class of compositional functions with the same accuracy as shallow networks
but with exponentially lower number of training parameters as well as
VC-dimension. This theorem settles an old conjecture by Bengio on the role of
depth in networks. We then define a general class of scalable, shift-invariant
algorithms to show a simple and natural set of requirements that justify deep
convolutional networks.
@article{mhaskar2016learning,
abstract = {While the universal approximation property holds both for hierarchical and
shallow networks, we prove that deep (hierarchical) networks can approximate
the class of compositional functions with the same accuracy as shallow networks
but with exponentially lower number of training parameters as well as
VC-dimension. This theorem settles an old conjecture by Bengio on the role of
depth in networks. We then define a general class of scalable, shift-invariant
algorithms to show a simple and natural set of requirements that justify deep
convolutional networks.},
added-at = {2019-11-01T15:35:45.000+0100},
author = {Mhaskar, Hrushikesh and Liao, Qianli and Poggio, Tomaso},
biburl = {https://www.bibsonomy.org/bibtex/2454c3c9f4b8849e27744cebe3c94c973/kirk86},
description = {[1603.00988] Learning Functions: When Is Deep Better Than Shallow},
interhash = {9fbae0ef1c92235e8874ab712097d0e3},
intrahash = {454c3c9f4b8849e27744cebe3c94c973},
keywords = {approximate complexity generalization readings},
note = {cite arxiv:1603.00988},
timestamp = {2019-11-01T15:35:45.000+0100},
title = {Learning Functions: When Is Deep Better Than Shallow},
url = {http://arxiv.org/abs/1603.00988},
year = 2016
}