We theoretically analyse the limits of robustness to test-time adversarial
and noisy examples in classification. Our work focuses on deriving bounds which
uniformly apply to all classifiers (i.e all measurable functions from features
to labels) for a given problem. Our contributions are three-fold. (1) In the
classical framework of adversarial attacks, we use optimal transport theory to
derive variational formulae for the Bayes-optimal error a classifier can make
on a given classification problem, subject to adversarial attacks. We also
propose a simple algorithm for computing the corresponding universal (i.e
classifier-independent) adversarial attacks, based on maximal matching on
bipartite graphs. (2) We derive explicit lower-bounds on the Bayes-optimal
error in the case of the popular distance-based attacks. These bounds are
universal in the sense that they depend on the geometry of the
class-conditional distributions of the data, but not on a particular
classifier. This is in contrast with the existing literature, wherein
adversarial vulnerability of classifiers is derived as a consequence of nonzero
ordinary test error. (3) For our third contribution, we study robustness to
random noise corruption, wherein the attacker (or nature) is allowed to inject
random noise into examples at test time. We establish nonlinear data-processing
inequalities induced by such corruptions, and use them to obtain lower-bounds
on the Bayes-optimal error.
Description
[2006.09989] Universal Lower-Bounds on Classification Error under Adversarial Attacks and Random Corruption
%0 Journal Article
%1 dohmatob2020universal
%A Dohmatob, Elvis
%D 2020
%K adversarial bounds readings
%T Universal Lower-Bounds on Classification Error under Adversarial Attacks
and Random Corruption
%U http://arxiv.org/abs/2006.09989
%X We theoretically analyse the limits of robustness to test-time adversarial
and noisy examples in classification. Our work focuses on deriving bounds which
uniformly apply to all classifiers (i.e all measurable functions from features
to labels) for a given problem. Our contributions are three-fold. (1) In the
classical framework of adversarial attacks, we use optimal transport theory to
derive variational formulae for the Bayes-optimal error a classifier can make
on a given classification problem, subject to adversarial attacks. We also
propose a simple algorithm for computing the corresponding universal (i.e
classifier-independent) adversarial attacks, based on maximal matching on
bipartite graphs. (2) We derive explicit lower-bounds on the Bayes-optimal
error in the case of the popular distance-based attacks. These bounds are
universal in the sense that they depend on the geometry of the
class-conditional distributions of the data, but not on a particular
classifier. This is in contrast with the existing literature, wherein
adversarial vulnerability of classifiers is derived as a consequence of nonzero
ordinary test error. (3) For our third contribution, we study robustness to
random noise corruption, wherein the attacker (or nature) is allowed to inject
random noise into examples at test time. We establish nonlinear data-processing
inequalities induced by such corruptions, and use them to obtain lower-bounds
on the Bayes-optimal error.
@article{dohmatob2020universal,
abstract = {We theoretically analyse the limits of robustness to test-time adversarial
and noisy examples in classification. Our work focuses on deriving bounds which
uniformly apply to all classifiers (i.e all measurable functions from features
to labels) for a given problem. Our contributions are three-fold. (1) In the
classical framework of adversarial attacks, we use optimal transport theory to
derive variational formulae for the Bayes-optimal error a classifier can make
on a given classification problem, subject to adversarial attacks. We also
propose a simple algorithm for computing the corresponding universal (i.e
classifier-independent) adversarial attacks, based on maximal matching on
bipartite graphs. (2) We derive explicit lower-bounds on the Bayes-optimal
error in the case of the popular distance-based attacks. These bounds are
universal in the sense that they depend on the geometry of the
class-conditional distributions of the data, but not on a particular
classifier. This is in contrast with the existing literature, wherein
adversarial vulnerability of classifiers is derived as a consequence of nonzero
ordinary test error. (3) For our third contribution, we study robustness to
random noise corruption, wherein the attacker (or nature) is allowed to inject
random noise into examples at test time. We establish nonlinear data-processing
inequalities induced by such corruptions, and use them to obtain lower-bounds
on the Bayes-optimal error.},
added-at = {2020-06-18T19:22:02.000+0200},
author = {Dohmatob, Elvis},
biburl = {https://www.bibsonomy.org/bibtex/2f7d12b4043569a1c2b802dedb42e794e/kirk86},
description = {[2006.09989] Universal Lower-Bounds on Classification Error under Adversarial Attacks and Random Corruption},
interhash = {7925b971d48ff84c623d919b739fbc1d},
intrahash = {f7d12b4043569a1c2b802dedb42e794e},
keywords = {adversarial bounds readings},
note = {cite arxiv:2006.09989},
timestamp = {2020-06-18T19:22:02.000+0200},
title = {Universal Lower-Bounds on Classification Error under Adversarial Attacks
and Random Corruption},
url = {http://arxiv.org/abs/2006.09989},
year = 2020
}