While gradient descent has proven highly successful in learning connection
weights for neural networks, the actual structure of these networks is usually
determined by hand, or by other optimization algorithms. Here we describe a
simple method to make network structure differentiable, and therefore
accessible to gradient descent. We test this method on recurrent neural
networks applied to simple sequence prediction problems. Starting with initial
networks containing only one node, the method automatically builds networks
that successfully solve the tasks. The number of nodes in the final network
correlates with task difficulty. The method can dynamically increase network
size in response to an abrupt complexification in the task; however, reduction
in network size in response to task simplification is not evident for
reasonable meta-parameters. The method does not penalize network performance
for these test tasks: variable-size networks actually reach better performance
than fixed-size networks of higher, lower or identical size. We conclude by
discussing how this method could be applied to more complex networks, such as
feedforward layered networks, or multiple-area networks of arbitrary shape.
%0 Generic
%1 miconi2016neural
%A Miconi, Thomas
%D 2016
%K learning network neural structure
%T Neural networks with differentiable structure
%U http://arxiv.org/abs/1606.06216
%X While gradient descent has proven highly successful in learning connection
weights for neural networks, the actual structure of these networks is usually
determined by hand, or by other optimization algorithms. Here we describe a
simple method to make network structure differentiable, and therefore
accessible to gradient descent. We test this method on recurrent neural
networks applied to simple sequence prediction problems. Starting with initial
networks containing only one node, the method automatically builds networks
that successfully solve the tasks. The number of nodes in the final network
correlates with task difficulty. The method can dynamically increase network
size in response to an abrupt complexification in the task; however, reduction
in network size in response to task simplification is not evident for
reasonable meta-parameters. The method does not penalize network performance
for these test tasks: variable-size networks actually reach better performance
than fixed-size networks of higher, lower or identical size. We conclude by
discussing how this method could be applied to more complex networks, such as
feedforward layered networks, or multiple-area networks of arbitrary shape.
@misc{miconi2016neural,
abstract = {While gradient descent has proven highly successful in learning connection
weights for neural networks, the actual structure of these networks is usually
determined by hand, or by other optimization algorithms. Here we describe a
simple method to make network structure differentiable, and therefore
accessible to gradient descent. We test this method on recurrent neural
networks applied to simple sequence prediction problems. Starting with initial
networks containing only one node, the method automatically builds networks
that successfully solve the tasks. The number of nodes in the final network
correlates with task difficulty. The method can dynamically increase network
size in response to an abrupt complexification in the task; however, reduction
in network size in response to task simplification is not evident for
reasonable meta-parameters. The method does not penalize network performance
for these test tasks: variable-size networks actually reach better performance
than fixed-size networks of higher, lower or identical size. We conclude by
discussing how this method could be applied to more complex networks, such as
feedforward layered networks, or multiple-area networks of arbitrary shape.},
added-at = {2017-02-06T17:55:47.000+0100},
author = {Miconi, Thomas},
biburl = {https://www.bibsonomy.org/bibtex/270798d04639219533b0e56caff0e0a89/thoni},
description = {Neural networks with differentiable structure},
interhash = {ed3b85890376c652b5316a9352fe1cfd},
intrahash = {70798d04639219533b0e56caff0e0a89},
keywords = {learning network neural structure},
note = {cite arxiv:1606.06216},
timestamp = {2017-02-06T17:55:47.000+0100},
title = {Neural networks with differentiable structure},
url = {http://arxiv.org/abs/1606.06216},
year = 2016
}