A family of super deep networks, referred to as residual networks or ResNet,
achieved record-beating performance in various visual tasks such as image
recognition, object detection, and semantic segmentation. The ability to train
very deep networks naturally pushed the researchers to use enormous resources
to achieve the best performance. Consequently, in many applications super deep
residual networks were employed for just a marginal improvement in performance.
In this paper, we propose epsilon-ResNet that allows us to automatically
discard redundant layers, which produces responses that are smaller than a
threshold epsilon, with a marginal or no loss in performance. The
epsilon-ResNet architecture can be achieved using a few additional rectified
linear units in the original ResNet. Our method does not use any additional
variables nor numerous trials like other hyper-parameter optimization
techniques. The layer selection is achieved using a single training process and
the evaluation is performed on CIFAR-10, CIFAR-100, SVHN, and ImageNet
datasets. In some instances, we achieve about 80% reduction in the number of
parameters.
Beschreibung
Learning Strict Identity Mappings in Deep Residual Networks
%0 Generic
%1 yu2018learning
%A Yu, Xin
%A Yu, Zhiding
%A Ramalingam, Srikumar
%D 2018
%K seminar
%T Learning Strict Identity Mappings in Deep Residual Networks
%U http://arxiv.org/abs/1804.01661
%X A family of super deep networks, referred to as residual networks or ResNet,
achieved record-beating performance in various visual tasks such as image
recognition, object detection, and semantic segmentation. The ability to train
very deep networks naturally pushed the researchers to use enormous resources
to achieve the best performance. Consequently, in many applications super deep
residual networks were employed for just a marginal improvement in performance.
In this paper, we propose epsilon-ResNet that allows us to automatically
discard redundant layers, which produces responses that are smaller than a
threshold epsilon, with a marginal or no loss in performance. The
epsilon-ResNet architecture can be achieved using a few additional rectified
linear units in the original ResNet. Our method does not use any additional
variables nor numerous trials like other hyper-parameter optimization
techniques. The layer selection is achieved using a single training process and
the evaluation is performed on CIFAR-10, CIFAR-100, SVHN, and ImageNet
datasets. In some instances, we achieve about 80% reduction in the number of
parameters.
@misc{yu2018learning,
abstract = {A family of super deep networks, referred to as residual networks or ResNet,
achieved record-beating performance in various visual tasks such as image
recognition, object detection, and semantic segmentation. The ability to train
very deep networks naturally pushed the researchers to use enormous resources
to achieve the best performance. Consequently, in many applications super deep
residual networks were employed for just a marginal improvement in performance.
In this paper, we propose epsilon-ResNet that allows us to automatically
discard redundant layers, which produces responses that are smaller than a
threshold epsilon, with a marginal or no loss in performance. The
epsilon-ResNet architecture can be achieved using a few additional rectified
linear units in the original ResNet. Our method does not use any additional
variables nor numerous trials like other hyper-parameter optimization
techniques. The layer selection is achieved using a single training process and
the evaluation is performed on CIFAR-10, CIFAR-100, SVHN, and ImageNet
datasets. In some instances, we achieve about 80% reduction in the number of
parameters.},
added-at = {2018-04-10T19:20:16.000+0200},
author = {Yu, Xin and Yu, Zhiding and Ramalingam, Srikumar},
biburl = {https://www.bibsonomy.org/bibtex/2c0aeec6a26d6cd8e7f99dec47eb5208b/jk_itwm},
description = {Learning Strict Identity Mappings in Deep Residual Networks},
interhash = {87fbc55d1ba5aee405819a26cab5ee6a},
intrahash = {c0aeec6a26d6cd8e7f99dec47eb5208b},
keywords = {seminar},
note = {cite arxiv:1804.01661},
timestamp = {2018-04-10T19:20:16.000+0200},
title = {Learning Strict Identity Mappings in Deep Residual Networks},
url = {http://arxiv.org/abs/1804.01661},
year = 2018
}