State-of-the-art object detection networks depend on region proposal
algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN
have reduced the running time of these detection networks, exposing region
proposal computation as a bottleneck. In this work, we introduce a Region
Proposal Network (RPN) that shares full-image convolutional features with the
detection network, thus enabling nearly cost-free region proposals. An RPN is a
fully convolutional network that simultaneously predicts object bounds and
objectness scores at each position. The RPN is trained end-to-end to generate
high-quality region proposals, which are used by Fast R-CNN for detection. We
further merge RPN and Fast R-CNN into a single network by sharing their
convolutional features---using the recently popular terminology of neural
networks with 'attention' mechanisms, the RPN component tells the unified
network where to look. For the very deep VGG-16 model, our detection system has
a frame rate of 5fps (including all steps) on a GPU, while achieving
state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS
COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015
competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning
entries in several tracks. Code has been made publicly available.
Description
[1506.01497] Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
%0 Generic
%1 ren2015faster
%A Ren, Shaoqing
%A He, Kaiming
%A Girshick, Ross
%A Sun, Jian
%D 2015
%K 2015 computer-vision detection
%R 10.1109/TPAMI.2016.2577031
%T Faster R-CNN: Towards Real-Time Object Detection with Region Proposal
Networks
%U http://arxiv.org/abs/1506.01497
%X State-of-the-art object detection networks depend on region proposal
algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN
have reduced the running time of these detection networks, exposing region
proposal computation as a bottleneck. In this work, we introduce a Region
Proposal Network (RPN) that shares full-image convolutional features with the
detection network, thus enabling nearly cost-free region proposals. An RPN is a
fully convolutional network that simultaneously predicts object bounds and
objectness scores at each position. The RPN is trained end-to-end to generate
high-quality region proposals, which are used by Fast R-CNN for detection. We
further merge RPN and Fast R-CNN into a single network by sharing their
convolutional features---using the recently popular terminology of neural
networks with 'attention' mechanisms, the RPN component tells the unified
network where to look. For the very deep VGG-16 model, our detection system has
a frame rate of 5fps (including all steps) on a GPU, while achieving
state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS
COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015
competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning
entries in several tracks. Code has been made publicly available.
@misc{ren2015faster,
abstract = {State-of-the-art object detection networks depend on region proposal
algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN
have reduced the running time of these detection networks, exposing region
proposal computation as a bottleneck. In this work, we introduce a Region
Proposal Network (RPN) that shares full-image convolutional features with the
detection network, thus enabling nearly cost-free region proposals. An RPN is a
fully convolutional network that simultaneously predicts object bounds and
objectness scores at each position. The RPN is trained end-to-end to generate
high-quality region proposals, which are used by Fast R-CNN for detection. We
further merge RPN and Fast R-CNN into a single network by sharing their
convolutional features---using the recently popular terminology of neural
networks with 'attention' mechanisms, the RPN component tells the unified
network where to look. For the very deep VGG-16 model, our detection system has
a frame rate of 5fps (including all steps) on a GPU, while achieving
state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS
COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015
competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning
entries in several tracks. Code has been made publicly available.},
added-at = {2020-06-10T16:08:30.000+0200},
author = {Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
biburl = {https://www.bibsonomy.org/bibtex/2f69d04b7e1feab155930f35a4622142f/analyst},
description = {[1506.01497] Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
doi = {10.1109/TPAMI.2016.2577031},
interhash = {247960257b8b9964e0c61d7b53bdcfdc},
intrahash = {f69d04b7e1feab155930f35a4622142f},
keywords = {2015 computer-vision detection},
note = {cite arxiv:1506.01497Comment: Extended tech report},
timestamp = {2020-06-10T16:13:35.000+0200},
title = {Faster R-CNN: Towards Real-Time Object Detection with Region Proposal
Networks},
url = {http://arxiv.org/abs/1506.01497},
year = 2015
}