We present an unsupervised learning framework for the task of monocular depth and camera motion estimation from unstructured video sequences. In common with recent work 10, 14, 16, we use an end-to-end learning approach with view synthesis as the supervisory signal. In contrast to the previous work, our method is completely unsupervised, requiring only monocular video sequences for training. Our method uses single-view depth and multiview pose networks, with a loss based on warping nearby views to the target using the computed depth and pose. The networks are thus coupled by the loss during training, but can be applied independently at test time. Empirical evaluation on the KITTI dataset demonstrates the effectiveness of our approach: 1) monocular depth performs comparably with supervised methods that use either ground-truth pose or depth for training, and 2) pose estimation performs favorably compared to established SLAM systems under comparable input settings.
%0 Conference Paper
%1 2017-zhou
%A Zhou, Tinghui
%A Brown, Matthew
%A Snavely, Noah
%A Lowe, David G.
%B 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)
%D 2017
%K depth monocular self-supervised sfm sfm-learner video zhou
%P 6612-6619
%R 10.1109/CVPR.2017.700
%T Unsupervised Learning of Depth and Ego-Motion from Video
%U https://ieeexplore.ieee.org/document/8100183/
%X We present an unsupervised learning framework for the task of monocular depth and camera motion estimation from unstructured video sequences. In common with recent work 10, 14, 16, we use an end-to-end learning approach with view synthesis as the supervisory signal. In contrast to the previous work, our method is completely unsupervised, requiring only monocular video sequences for training. Our method uses single-view depth and multiview pose networks, with a loss based on warping nearby views to the target using the computed depth and pose. The networks are thus coupled by the loss during training, but can be applied independently at test time. Empirical evaluation on the KITTI dataset demonstrates the effectiveness of our approach: 1) monocular depth performs comparably with supervised methods that use either ground-truth pose or depth for training, and 2) pose estimation performs favorably compared to established SLAM systems under comparable input settings.
@inproceedings{2017-zhou,
abstract = {We present an unsupervised learning framework for the task of monocular depth and camera motion estimation from unstructured video sequences. In common with recent work [10, 14, 16], we use an end-to-end learning approach with view synthesis as the supervisory signal. In contrast to the previous work, our method is completely unsupervised, requiring only monocular video sequences for training. Our method uses single-view depth and multiview pose networks, with a loss based on warping nearby views to the target using the computed depth and pose. The networks are thus coupled by the loss during training, but can be applied independently at test time. Empirical evaluation on the KITTI dataset demonstrates the effectiveness of our approach: 1) monocular depth performs comparably with supervised methods that use either ground-truth pose or depth for training, and 2) pose estimation performs favorably compared to established SLAM systems under comparable input settings.},
added-at = {2021-07-06T08:42:53.000+0200},
author = {Zhou, Tinghui and Brown, Matthew and Snavely, Noah and Lowe, David G.},
biburl = {https://www.bibsonomy.org/bibtex/25f9139e33fa865d36ce885d590930544/pkoch},
booktitle = {2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
doi = {10.1109/CVPR.2017.700},
interhash = {d6e652128eb486483ca353bacf211a6a},
intrahash = {5f9139e33fa865d36ce885d590930544},
issn = {1063-6919},
keywords = {depth monocular self-supervised sfm sfm-learner video zhou},
month = {July},
pages = {6612-6619},
timestamp = {2021-07-06T08:42:53.000+0200},
title = {Unsupervised Learning of Depth and Ego-Motion from Video},
url = {https://ieeexplore.ieee.org/document/8100183/},
year = 2017
}