A basic operation in Convolutional Neural Networks (CNNs) is spatial resizing
of feature maps. This is done either by strided convolution (donwscaling) or
transposed convolution (upscaling). Such operations are limited to a fixed
filter moving at predetermined integer steps (strides). Spatial sizes of
consecutive layers are related by integer scale factors, predetermined at
architectural design, and remain fixed throughout training and inference time.
We propose a generalization of the common Conv-layer, from a discrete layer to
a Continuous Convolution (CC) Layer. CC Layers naturally extend Conv-layers by
representing the filter as a learned continuous function over sub-pixel
coordinates. This allows learnable and principled resizing of feature maps, to
any size, dynamically and consistently across scales. Once trained, the CC
layer can be used to output any scale/size chosen at inference time. The scale
can be non-integer and differ between the axes. CC gives rise to new freedoms
for architectural design, such as dynamic layer shapes at inference time, or
gradual architectures where the size changes by a small factor at each layer.
This gives rise to many desired CNN properties, new architectural design
capabilities, and useful applications. We further show that current Conv-layers
suffer from inherent misalignments, which are ameliorated by CC layers.
%0 Generic
%1 shocher2020discrete
%A Shocher, Assaf
%A Feinstein, Ben
%A Haim, Niv
%A Irani, Michal
%D 2020
%K cnn continuous todo:read
%T From Discrete to Continuous Convolution Layers
%U http://arxiv.org/abs/2006.11120
%X A basic operation in Convolutional Neural Networks (CNNs) is spatial resizing
of feature maps. This is done either by strided convolution (donwscaling) or
transposed convolution (upscaling). Such operations are limited to a fixed
filter moving at predetermined integer steps (strides). Spatial sizes of
consecutive layers are related by integer scale factors, predetermined at
architectural design, and remain fixed throughout training and inference time.
We propose a generalization of the common Conv-layer, from a discrete layer to
a Continuous Convolution (CC) Layer. CC Layers naturally extend Conv-layers by
representing the filter as a learned continuous function over sub-pixel
coordinates. This allows learnable and principled resizing of feature maps, to
any size, dynamically and consistently across scales. Once trained, the CC
layer can be used to output any scale/size chosen at inference time. The scale
can be non-integer and differ between the axes. CC gives rise to new freedoms
for architectural design, such as dynamic layer shapes at inference time, or
gradual architectures where the size changes by a small factor at each layer.
This gives rise to many desired CNN properties, new architectural design
capabilities, and useful applications. We further show that current Conv-layers
suffer from inherent misalignments, which are ameliorated by CC layers.
@misc{shocher2020discrete,
abstract = {A basic operation in Convolutional Neural Networks (CNNs) is spatial resizing
of feature maps. This is done either by strided convolution (donwscaling) or
transposed convolution (upscaling). Such operations are limited to a fixed
filter moving at predetermined integer steps (strides). Spatial sizes of
consecutive layers are related by integer scale factors, predetermined at
architectural design, and remain fixed throughout training and inference time.
We propose a generalization of the common Conv-layer, from a discrete layer to
a Continuous Convolution (CC) Layer. CC Layers naturally extend Conv-layers by
representing the filter as a learned continuous function over sub-pixel
coordinates. This allows learnable and principled resizing of feature maps, to
any size, dynamically and consistently across scales. Once trained, the CC
layer can be used to output any scale/size chosen at inference time. The scale
can be non-integer and differ between the axes. CC gives rise to new freedoms
for architectural design, such as dynamic layer shapes at inference time, or
gradual architectures where the size changes by a small factor at each layer.
This gives rise to many desired CNN properties, new architectural design
capabilities, and useful applications. We further show that current Conv-layers
suffer from inherent misalignments, which are ameliorated by CC layers.},
added-at = {2021-07-01T09:42:15.000+0200},
author = {Shocher, Assaf and Feinstein, Ben and Haim, Niv and Irani, Michal},
biburl = {https://www.bibsonomy.org/bibtex/27c4221799a59fc351d3f83d348aab90e/annakrause},
description = {2006.11120.pdf},
interhash = {e08171710d042158370451959c75a9bb},
intrahash = {7c4221799a59fc351d3f83d348aab90e},
keywords = {cnn continuous todo:read},
note = {cite arxiv:2006.11120},
timestamp = {2021-07-01T09:42:15.000+0200},
title = {From Discrete to Continuous Convolution Layers},
url = {http://arxiv.org/abs/2006.11120},
year = 2020
}