We present a new, publicly-available image dataset generated by the NVIDIA
Deep Learning Data Synthesizer intended for use in object detection, pose
estimation, and tracking applications. This dataset contains 144k stereo image
pairs that synthetically combine 18 camera viewpoints of three photorealistic
virtual environments with up to 10 objects (chosen randomly from the 21 object
models of the YCB dataset 1) and flying distractors. Object and camera pose,
scene lighting, and quantity of objects and distractors were randomized. Each
provided view includes RGB, depth, segmentation, and surface normal images, all
pixel level. We describe our approach for domain randomization and provide
insight into the decisions that produced the dataset.
Description
SIDOD: A Synthetic Image Dataset for 3D Object Pose Recognition with Distractors
%0 Journal Article
%1 journals/corr/abs-2008-05955
%A Jalal, Mona
%A Spjut, Josef
%A Boudaoud, Ben
%A Betke, Margrit
%D 2019
%K computer vision
%R 10.1109/CVPRW.2019.00063
%T SIDOD: A Synthetic Image Dataset for 3D Object Pose Recognition with
Distractors
%U http://arxiv.org/abs/2008.05955
%X We present a new, publicly-available image dataset generated by the NVIDIA
Deep Learning Data Synthesizer intended for use in object detection, pose
estimation, and tracking applications. This dataset contains 144k stereo image
pairs that synthetically combine 18 camera viewpoints of three photorealistic
virtual environments with up to 10 objects (chosen randomly from the 21 object
models of the YCB dataset 1) and flying distractors. Object and camera pose,
scene lighting, and quantity of objects and distractors were randomized. Each
provided view includes RGB, depth, segmentation, and surface normal images, all
pixel level. We describe our approach for domain randomization and provide
insight into the decisions that produced the dataset.
@article{journals/corr/abs-2008-05955,
abstract = {We present a new, publicly-available image dataset generated by the NVIDIA
Deep Learning Data Synthesizer intended for use in object detection, pose
estimation, and tracking applications. This dataset contains 144k stereo image
pairs that synthetically combine 18 camera viewpoints of three photorealistic
virtual environments with up to 10 objects (chosen randomly from the 21 object
models of the YCB dataset [1]) and flying distractors. Object and camera pose,
scene lighting, and quantity of objects and distractors were randomized. Each
provided view includes RGB, depth, segmentation, and surface normal images, all
pixel level. We describe our approach for domain randomization and provide
insight into the decisions that produced the dataset.},
added-at = {2020-09-30T08:18:28.000+0200},
author = {Jalal, Mona and Spjut, Josef and Boudaoud, Ben and Betke, Margrit},
biburl = {https://www.bibsonomy.org/bibtex/232b95f32d160135fa5c0ae96b7fdffbe/monajalal},
description = {SIDOD: A Synthetic Image Dataset for 3D Object Pose Recognition with Distractors},
doi = {10.1109/CVPRW.2019.00063},
interhash = {b680a38e33ddb4a8f70b907befeabb8a},
intrahash = {32b95f32d160135fa5c0ae96b7fdffbe},
keywords = {computer vision},
note = {cite arxiv:2008.05955Comment: 3 pages, 4 figures, 1 table, Accepted at CVPR 2019 Workshop},
timestamp = {2020-09-30T08:18:55.000+0200},
title = {SIDOD: A Synthetic Image Dataset for 3D Object Pose Recognition with
Distractors},
url = {http://arxiv.org/abs/2008.05955},
year = 2019
}