We present the first learning-based visual odometry (VO) model, which
generalizes to multiple datasets and real-world scenarios and outperforms
geometry-based methods in challenging scenes. We achieve this by leveraging the
SLAM dataset TartanAir, which provides a large amount of diverse synthetic data
in challenging environments. Furthermore, to make our VO model generalize
across datasets, we propose an up-to-scale loss function and incorporate the
camera intrinsic parameters into the model. Experiments show that a single
model, TartanVO, trained only on synthetic data, without any finetuning, can be
generalized to real-world datasets such as KITTI and EuRoC, demonstrating
significant advantages over the geometry-based methods on challenging
trajectories. Our code is available at https://github.com/castacks/tartanvo.
Description
[2011.00359] TartanVO: A Generalizable Learning-based VO
%0 Generic
%1 wang2020tartanvo
%A Wang, Wenshan
%A Hu, Yaoyu
%A Scherer, Sebastian
%D 2020
%K 2020 3D multi-view
%T TartanVO: A Generalizable Learning-based VO
%U http://arxiv.org/abs/2011.00359
%X We present the first learning-based visual odometry (VO) model, which
generalizes to multiple datasets and real-world scenarios and outperforms
geometry-based methods in challenging scenes. We achieve this by leveraging the
SLAM dataset TartanAir, which provides a large amount of diverse synthetic data
in challenging environments. Furthermore, to make our VO model generalize
across datasets, we propose an up-to-scale loss function and incorporate the
camera intrinsic parameters into the model. Experiments show that a single
model, TartanVO, trained only on synthetic data, without any finetuning, can be
generalized to real-world datasets such as KITTI and EuRoC, demonstrating
significant advantages over the geometry-based methods on challenging
trajectories. Our code is available at https://github.com/castacks/tartanvo.
@misc{wang2020tartanvo,
abstract = {We present the first learning-based visual odometry (VO) model, which
generalizes to multiple datasets and real-world scenarios and outperforms
geometry-based methods in challenging scenes. We achieve this by leveraging the
SLAM dataset TartanAir, which provides a large amount of diverse synthetic data
in challenging environments. Furthermore, to make our VO model generalize
across datasets, we propose an up-to-scale loss function and incorporate the
camera intrinsic parameters into the model. Experiments show that a single
model, TartanVO, trained only on synthetic data, without any finetuning, can be
generalized to real-world datasets such as KITTI and EuRoC, demonstrating
significant advantages over the geometry-based methods on challenging
trajectories. Our code is available at https://github.com/castacks/tartanvo.},
added-at = {2021-03-16T07:14:23.000+0100},
author = {Wang, Wenshan and Hu, Yaoyu and Scherer, Sebastian},
biburl = {https://www.bibsonomy.org/bibtex/24ded63044bd88b09cf405750eed13c9e/analyst},
description = {[2011.00359] TartanVO: A Generalizable Learning-based VO},
interhash = {5d8f430fc7254d32dc5a6de09362f083},
intrahash = {4ded63044bd88b09cf405750eed13c9e},
keywords = {2020 3D multi-view},
note = {cite arxiv:2011.00359},
timestamp = {2021-03-16T07:14:23.000+0100},
title = {TartanVO: A Generalizable Learning-based VO},
url = {http://arxiv.org/abs/2011.00359},
year = 2020
}