This paper presents a new algorithm for the automatic recognition
of object classes from images (categorization). Compact and yet discriminative
appearance-based object class models are automatically learned from
a set of training images. The method is simple and extremely fast,
making it suitable for many applications such as semantic image retrieval,
Web search, and interactive image editing. It classifies a region
according to the proportions of different visual words (clusters
in feature space). The specific visual words and the typical proportions
in each object are learned from a segmented training set. The main
contribution of this paper is twofold: i) an optimally compact visual
dictionary is learned by pair-wise merging of visual words from an
initially large dictionary. The final visual words are described
by GMMs. ii) A novel statistical measure of discrimination is proposed
which is optimized by each merge operation. High classification accuracy
is demonstrated for nine object classes on photographs of real objects
viewed under general lighting conditions, poses and viewpoints. The
set of test images used for validation comprise: i) photographs acquired
by us, ii) images from the Web and iii) images from the recently
released Pascal dataset. The proposed algorithm performs well on
both texture-rich objects (e.g. grass, sky, trees) and structure-rich
ones (e.g. cars, bikes, planes).
%0 Journal Article
%1 Winn2005
%A Winn, J.
%A Criminisi, A.
%A Minka, T.
%D 2005
%J Computer Vision, 2005. ICCV 2005. Tenth IEEE International Conference
on
%K (artificial appearance-based categorization, class classification, dictionary, discriminative image intelligence), learning models, object recognition recognition, universal visual words
%P 1800-1807
%R 10.1109/ICCV.2005.171
%T Object categorization by learned universal visual dictionary
%V 2
%X This paper presents a new algorithm for the automatic recognition
of object classes from images (categorization). Compact and yet discriminative
appearance-based object class models are automatically learned from
a set of training images. The method is simple and extremely fast,
making it suitable for many applications such as semantic image retrieval,
Web search, and interactive image editing. It classifies a region
according to the proportions of different visual words (clusters
in feature space). The specific visual words and the typical proportions
in each object are learned from a segmented training set. The main
contribution of this paper is twofold: i) an optimally compact visual
dictionary is learned by pair-wise merging of visual words from an
initially large dictionary. The final visual words are described
by GMMs. ii) A novel statistical measure of discrimination is proposed
which is optimized by each merge operation. High classification accuracy
is demonstrated for nine object classes on photographs of real objects
viewed under general lighting conditions, poses and viewpoints. The
set of test images used for validation comprise: i) photographs acquired
by us, ii) images from the Web and iii) images from the recently
released Pascal dataset. The proposed algorithm performs well on
both texture-rich objects (e.g. grass, sky, trees) and structure-rich
ones (e.g. cars, bikes, planes).
@article{Winn2005,
abstract = {This paper presents a new algorithm for the automatic recognition
of object classes from images (categorization). Compact and yet discriminative
appearance-based object class models are automatically learned from
a set of training images. The method is simple and extremely fast,
making it suitable for many applications such as semantic image retrieval,
Web search, and interactive image editing. It classifies a region
according to the proportions of different visual words (clusters
in feature space). The specific visual words and the typical proportions
in each object are learned from a segmented training set. The main
contribution of this paper is twofold: i) an optimally compact visual
dictionary is learned by pair-wise merging of visual words from an
initially large dictionary. The final visual words are described
by GMMs. ii) A novel statistical measure of discrimination is proposed
which is optimized by each merge operation. High classification accuracy
is demonstrated for nine object classes on photographs of real objects
viewed under general lighting conditions, poses and viewpoints. The
set of test images used for validation comprise: i) photographs acquired
by us, ii) images from the Web and iii) images from the recently
released Pascal dataset. The proposed algorithm performs well on
both texture-rich objects (e.g. grass, sky, trees) and structure-rich
ones (e.g. cars, bikes, planes).},
added-at = {2009-09-12T19:19:34.000+0200},
author = {Winn, J. and Criminisi, A. and Minka, T.},
biburl = {https://www.bibsonomy.org/bibtex/2479e2efe3faefedadd0978bacdf8b3ec/mozaher},
doi = {10.1109/ICCV.2005.171},
file = {01544935.pdf:Winn2005.pdf:PDF},
interhash = {a585b5dab8401171bc19736909e30368},
intrahash = {479e2efe3faefedadd0978bacdf8b3ec},
issn = {1550-5499},
journal = {Computer Vision, 2005. ICCV 2005. Tenth IEEE International Conference
on},
keywords = {(artificial appearance-based categorization, class classification, dictionary, discriminative image intelligence), learning models, object recognition recognition, universal visual words},
month = {17-21 Oct},
owner = {Mozaher},
pages = {1800-1807},
timestamp = {2009-09-12T19:19:43.000+0200},
title = {Object categorization by learned universal visual dictionary},
volume = 2,
year = 2005
}