Non-linear functions such as neural networks can be locally approximated by
affine planes. Recent works make use of input-Jacobians, which describe the
normal to these planes. In this paper, we introduce full-Jacobians, which
includes this normal along with an additional intercept term called the
bias-Jacobians, that together completely describe local planes. For ReLU neural
networks, bias-Jacobians correspond to sums of gradients of outputs w.r.t.
intermediate layer activations.
We first use these full-Jacobians for distillation by aligning gradients of
their intermediate representations. Next, we regularize bias-Jacobians alone to
improve generalization. Finally, we show that full-Jacobian maps can be viewed
as saliency maps. Experimental results show improved distillation on small
data-sets, improved generalization for neural network training, and sharper
saliency maps.
%0 Generic
%1 srinivas2019fulljacobian
%A Srinivas, Suraj
%A Fleuret, Francois
%D 2019
%K 2019 arxiv deep-learning paper
%T Full-Jacobian Representation of Neural Networks
%U http://arxiv.org/abs/1905.00780
%X Non-linear functions such as neural networks can be locally approximated by
affine planes. Recent works make use of input-Jacobians, which describe the
normal to these planes. In this paper, we introduce full-Jacobians, which
includes this normal along with an additional intercept term called the
bias-Jacobians, that together completely describe local planes. For ReLU neural
networks, bias-Jacobians correspond to sums of gradients of outputs w.r.t.
intermediate layer activations.
We first use these full-Jacobians for distillation by aligning gradients of
their intermediate representations. Next, we regularize bias-Jacobians alone to
improve generalization. Finally, we show that full-Jacobian maps can be viewed
as saliency maps. Experimental results show improved distillation on small
data-sets, improved generalization for neural network training, and sharper
saliency maps.
@misc{srinivas2019fulljacobian,
abstract = {Non-linear functions such as neural networks can be locally approximated by
affine planes. Recent works make use of input-Jacobians, which describe the
normal to these planes. In this paper, we introduce full-Jacobians, which
includes this normal along with an additional intercept term called the
bias-Jacobians, that together completely describe local planes. For ReLU neural
networks, bias-Jacobians correspond to sums of gradients of outputs w.r.t.
intermediate layer activations.
We first use these full-Jacobians for distillation by aligning gradients of
their intermediate representations. Next, we regularize bias-Jacobians alone to
improve generalization. Finally, we show that full-Jacobian maps can be viewed
as saliency maps. Experimental results show improved distillation on small
data-sets, improved generalization for neural network training, and sharper
saliency maps.},
added-at = {2019-05-20T08:14:26.000+0200},
author = {Srinivas, Suraj and Fleuret, Francois},
biburl = {https://www.bibsonomy.org/bibtex/29e44f49e81671136a9753115fc5c16e4/analyst},
interhash = {4379fe0dcb32def66f52612712d1187e},
intrahash = {9e44f49e81671136a9753115fc5c16e4},
keywords = {2019 arxiv deep-learning paper},
note = {cite arxiv:1905.00780},
timestamp = {2019-05-20T08:14:26.000+0200},
title = {Full-Jacobian Representation of Neural Networks},
url = {http://arxiv.org/abs/1905.00780},
year = 2019
}