Real-world large-scale datasets are heteroskedastic and imbalanced -- labels
have varying levels of uncertainty and label distributions are long-tailed.
Heteroskedasticity and imbalance challenge deep learning algorithms due to the
difficulty of distinguishing among mislabeled, ambiguous, and rare examples.
Addressing heteroskedasticity and imbalance simultaneously is under-explored.
We propose a data-dependent regularization technique for heteroskedastic
datasets that regularizes different regions of the input space differently.
Inspired by the theoretical derivation of the optimal regularization strength
in a one-dimensional nonparametric classification setting, our approach
adaptively regularizes the data points in higher-uncertainty, lower-density
regions more heavily. We test our method on several benchmark tasks, including
a real-world heteroskedastic and imbalanced dataset, WebVision. Our experiments
corroborate our theory and demonstrate a significant improvement over other
methods in noise-robust deep learning.
Beschreibung
[2006.15766] Heteroskedastic and Imbalanced Deep Learning with Adaptive Regularization
%0 Generic
%1 cao2020heteroskedastic
%A Cao, Kaidi
%A Chen, Yining
%A Lu, Junwei
%A Arechiga, Nikos
%A Gaidon, Adrien
%A Ma, Tengyu
%D 2020
%K 2020 dataset deep-learning
%T Heteroskedastic and Imbalanced Deep Learning with Adaptive
Regularization
%U http://arxiv.org/abs/2006.15766
%X Real-world large-scale datasets are heteroskedastic and imbalanced -- labels
have varying levels of uncertainty and label distributions are long-tailed.
Heteroskedasticity and imbalance challenge deep learning algorithms due to the
difficulty of distinguishing among mislabeled, ambiguous, and rare examples.
Addressing heteroskedasticity and imbalance simultaneously is under-explored.
We propose a data-dependent regularization technique for heteroskedastic
datasets that regularizes different regions of the input space differently.
Inspired by the theoretical derivation of the optimal regularization strength
in a one-dimensional nonparametric classification setting, our approach
adaptively regularizes the data points in higher-uncertainty, lower-density
regions more heavily. We test our method on several benchmark tasks, including
a real-world heteroskedastic and imbalanced dataset, WebVision. Our experiments
corroborate our theory and demonstrate a significant improvement over other
methods in noise-robust deep learning.
@misc{cao2020heteroskedastic,
abstract = {Real-world large-scale datasets are heteroskedastic and imbalanced -- labels
have varying levels of uncertainty and label distributions are long-tailed.
Heteroskedasticity and imbalance challenge deep learning algorithms due to the
difficulty of distinguishing among mislabeled, ambiguous, and rare examples.
Addressing heteroskedasticity and imbalance simultaneously is under-explored.
We propose a data-dependent regularization technique for heteroskedastic
datasets that regularizes different regions of the input space differently.
Inspired by the theoretical derivation of the optimal regularization strength
in a one-dimensional nonparametric classification setting, our approach
adaptively regularizes the data points in higher-uncertainty, lower-density
regions more heavily. We test our method on several benchmark tasks, including
a real-world heteroskedastic and imbalanced dataset, WebVision. Our experiments
corroborate our theory and demonstrate a significant improvement over other
methods in noise-robust deep learning.},
added-at = {2020-07-06T12:17:42.000+0200},
author = {Cao, Kaidi and Chen, Yining and Lu, Junwei and Arechiga, Nikos and Gaidon, Adrien and Ma, Tengyu},
biburl = {https://www.bibsonomy.org/bibtex/2c5d4f8f0ca2b46a38d1695bf8c0ef946/analyst},
description = {[2006.15766] Heteroskedastic and Imbalanced Deep Learning with Adaptive Regularization},
interhash = {dee5a283ee5644563599e1d4ba51b0a7},
intrahash = {c5d4f8f0ca2b46a38d1695bf8c0ef946},
keywords = {2020 dataset deep-learning},
note = {cite arxiv:2006.15766},
timestamp = {2020-07-06T12:17:42.000+0200},
title = {Heteroskedastic and Imbalanced Deep Learning with Adaptive
Regularization},
url = {http://arxiv.org/abs/2006.15766},
year = 2020
}