Suppose you are given some data set drawn from an underlying probability distribution P and you want to estimate a "simple" subset S of input space such that the probability that a test point drawn from P lies outside of S equals some a priori specified value between 0 and 1. We propose a method to approach this problem by trying to estimate a function f that is positive on S and negative on the complement. The functional form of f is given by a kernel expansion in terms of a potentially small subset of the training data; it is regularized by controlling the length of the weight vector in an associated feature space. The expansion coefficients are found by solving a quadratic programming problem, which we do by carrying out sequential optimization over pairs of input patterns. We also provide a theoretical analysis of the statistical performance of our algorithm. The algorithm is a natural extension of the support vector algorithm to the case of unlabeled data.
%0 Journal Article
%1 neuralcomp2001:osvm
%A Scholkopf, Bernhard
%A Platt, John C.
%A Shawe-Taylor, John
%A Smola, Alex J.
%A Williamson, Robert C.
%D 2001
%J Neural Comp.
%K machine osvm ssvm support vector
%N 7
%P 1443-1471
%T Estimating the Support of a High-Dimensional Distribution
%U http://neco.mitpress.org/cgi/content/abstract/13/7/1443
%V 13
%X Suppose you are given some data set drawn from an underlying probability distribution P and you want to estimate a "simple" subset S of input space such that the probability that a test point drawn from P lies outside of S equals some a priori specified value between 0 and 1. We propose a method to approach this problem by trying to estimate a function f that is positive on S and negative on the complement. The functional form of f is given by a kernel expansion in terms of a potentially small subset of the training data; it is regularized by controlling the length of the weight vector in an associated feature space. The expansion coefficients are found by solving a quadratic programming problem, which we do by carrying out sequential optimization over pairs of input patterns. We also provide a theoretical analysis of the statistical performance of our algorithm. The algorithm is a natural extension of the support vector algorithm to the case of unlabeled data.
@article{neuralcomp2001:osvm,
abstract = {Suppose you are given some data set drawn from an underlying probability distribution P and you want to estimate a "simple" subset S of input space such that the probability that a test point drawn from P lies outside of S equals some a priori specified value between 0 and 1. We propose a method to approach this problem by trying to estimate a function f that is positive on S and negative on the complement. The functional form of f is given by a kernel expansion in terms of a potentially small subset of the training data; it is regularized by controlling the length of the weight vector in an associated feature space. The expansion coefficients are found by solving a quadratic programming problem, which we do by carrying out sequential optimization over pairs of input patterns. We also provide a theoretical analysis of the statistical performance of our algorithm. The algorithm is a natural extension of the support vector algorithm to the case of unlabeled data.
},
added-at = {2007-04-28T16:01:00.000+0200},
author = {Scholkopf, Bernhard and Platt, John C. and Shawe-Taylor, John and Smola, Alex J. and Williamson, Robert C.},
biburl = {https://www.bibsonomy.org/bibtex/20d80edb797bed734f54d37ab199f0981/kzhou},
eprint = {http://neco.mitpress.org/cgi/reprint/13/7/1443.pdf},
interhash = {42fbb73b842381dbe4fd1c426be4118f},
intrahash = {0d80edb797bed734f54d37ab199f0981},
journal = {Neural Comp.},
keywords = {machine osvm ssvm support vector},
number = 7,
pages = {1443-1471},
timestamp = {2007-04-29T14:52:31.000+0200},
title = {{Estimating the Support of a High-Dimensional Distribution}},
url = {http://neco.mitpress.org/cgi/content/abstract/13/7/1443},
volume = 13,
year = 2001
}