We build new test sets for the CIFAR-10 and ImageNet datasets. Both
benchmarks have been the focus of intense research for almost a decade, raising
the danger of overfitting to excessively re-used test sets. By closely
following the original dataset creation processes, we test to what extent
current classification models generalize to new data. We evaluate a broad range
of models and find accuracy drops of 3% - 15% on CIFAR-10 and 11% - 14% on
ImageNet. However, accuracy gains on the original test sets translate to larger
gains on the new test sets. Our results suggest that the accuracy drops are not
caused by adaptivity, but by the models' inability to generalize to slightly
"harder" images than those found in the original test sets.
Description
[1902.10811] Do ImageNet Classifiers Generalize to ImageNet?
%0 Journal Article
%1 recht2019imagenet
%A Recht, Benjamin
%A Roelofs, Rebecca
%A Schmidt, Ludwig
%A Shankar, Vaishaal
%D 2019
%K adversarial bounds generalization machine-learning probability stats theory
%T Do ImageNet Classifiers Generalize to ImageNet?
%U http://arxiv.org/abs/1902.10811
%X We build new test sets for the CIFAR-10 and ImageNet datasets. Both
benchmarks have been the focus of intense research for almost a decade, raising
the danger of overfitting to excessively re-used test sets. By closely
following the original dataset creation processes, we test to what extent
current classification models generalize to new data. We evaluate a broad range
of models and find accuracy drops of 3% - 15% on CIFAR-10 and 11% - 14% on
ImageNet. However, accuracy gains on the original test sets translate to larger
gains on the new test sets. Our results suggest that the accuracy drops are not
caused by adaptivity, but by the models' inability to generalize to slightly
"harder" images than those found in the original test sets.
@article{recht2019imagenet,
abstract = {We build new test sets for the CIFAR-10 and ImageNet datasets. Both
benchmarks have been the focus of intense research for almost a decade, raising
the danger of overfitting to excessively re-used test sets. By closely
following the original dataset creation processes, we test to what extent
current classification models generalize to new data. We evaluate a broad range
of models and find accuracy drops of 3% - 15% on CIFAR-10 and 11% - 14% on
ImageNet. However, accuracy gains on the original test sets translate to larger
gains on the new test sets. Our results suggest that the accuracy drops are not
caused by adaptivity, but by the models' inability to generalize to slightly
"harder" images than those found in the original test sets.},
added-at = {2019-05-31T16:49:48.000+0200},
author = {Recht, Benjamin and Roelofs, Rebecca and Schmidt, Ludwig and Shankar, Vaishaal},
biburl = {https://www.bibsonomy.org/bibtex/26725744e71af5a5025eba66976bf8838/kirk86},
description = {[1902.10811] Do ImageNet Classifiers Generalize to ImageNet?},
interhash = {c5ec3a9e185982372a5fdd28d6b567b2},
intrahash = {6725744e71af5a5025eba66976bf8838},
keywords = {adversarial bounds generalization machine-learning probability stats theory},
note = {cite arxiv:1902.10811},
timestamp = {2020-03-02T02:31:10.000+0100},
title = {Do ImageNet Classifiers Generalize to ImageNet?},
url = {http://arxiv.org/abs/1902.10811},
year = 2019
}