D. Arthur, and S. Vassilvitskii. SODA '07: Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms, page 1027--1035. Philadelphia, PA, USA, Society for Industrial and Applied Mathematics, (2007)
Abstract
The k-means method is a widely used clustering technique that seeks to minimize the average squared distance between points in the same cluster. Although it offers no accuracy guarantees, its simplicity and speed are very appealing in practice. By augmenting k-means with a very simple, randomized seeding technique, we obtain an algorithm that is Θ(logk)-competitive with the optimal clustering. Preliminary experiments show that our augmentation improves both the speed and the accuracy of k-means, often quite dramatically.
%0 Conference Paper
%1 1283494
%A Arthur, David
%A Vassilvitskii, Sergei
%B SODA '07: Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms
%C Philadelphia, PA, USA
%D 2007
%I Society for Industrial and Applied Mathematics
%K k-means k-means++ clustering
%P 1027--1035
%T k-means++: the advantages of careful seeding
%U http://portal.acm.org/citation.cfm?id=1283494
%X The k-means method is a widely used clustering technique that seeks to minimize the average squared distance between points in the same cluster. Although it offers no accuracy guarantees, its simplicity and speed are very appealing in practice. By augmenting k-means with a very simple, randomized seeding technique, we obtain an algorithm that is Θ(logk)-competitive with the optimal clustering. Preliminary experiments show that our augmentation improves both the speed and the accuracy of k-means, often quite dramatically.
%@ 978-0-898716-24-5
@inproceedings{1283494,
abstract = {The k-means method is a widely used clustering technique that seeks to minimize the average squared distance between points in the same cluster. Although it offers no accuracy guarantees, its simplicity and speed are very appealing in practice. By augmenting k-means with a very simple, randomized seeding technique, we obtain an algorithm that is Θ(logk)-competitive with the optimal clustering. Preliminary experiments show that our augmentation improves both the speed and the accuracy of k-means, often quite dramatically.},
added-at = {2010-01-24T12:10:08.000+0100},
address = {Philadelphia, PA, USA},
author = {Arthur, David and Vassilvitskii, Sergei},
biburl = {https://www.bibsonomy.org/bibtex/2553bbfa74b13c47b4e9c7c0034a8406e/cscholz},
booktitle = {SODA '07: Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms},
description = {k-means++},
interhash = {0be633834158a3c9cba959406c3e1964},
intrahash = {553bbfa74b13c47b4e9c7c0034a8406e},
isbn = {978-0-898716-24-5},
keywords = {k-means k-means++ clustering},
location = {New Orleans, Louisiana},
pages = {1027--1035},
publisher = {Society for Industrial and Applied Mathematics},
timestamp = {2010-10-15T09:33:21.000+0200},
title = {k-means++: the advantages of careful seeding},
url = {http://portal.acm.org/citation.cfm?id=1283494},
year = 2007
}