In recent years, the management and processing of so-called data streams has become a topic of active research in several fields of computer science such as, e.g., distributed systems, database systems, and data mining. A data stream can roughly be thought of as a transient, continuously increasing sequence of time-stamped data. In this paper, we consider the problem of clustering parallel streams of real-valued data, that is to say, continuously evolving time series. In other words, we are interested in grouping data streams the evolution over time of which is similar in a specific sense. In order to maintain an up-to-date clustering structure, it is necessary to analyze the incoming data in an online manner, tolerating not more than a constant time delay. For this purpose, we develop an efficient online version of the classical K-means clustering algorithm. Our method’s efficiency is mainly due to a scalable online transformation of the original data which allows for a fast computation of approximate distances between streams.
Описание
ScienceDirect.com - Data & Knowledge Engineering - Online clustering of parallel data streams
%0 Journal Article
%1 Beringer2006180
%A Beringer, Jürgen
%A Hüllermeier, Eyke
%D 2006
%J Data & Knowledge Engineering
%K clustering online stream
%N 2
%P 180 - 204
%R 10.1016/j.datak.2005.05.009
%T Online clustering of parallel data streams
%U http://www.sciencedirect.com/science/article/pii/S0169023X05000819
%V 58
%X In recent years, the management and processing of so-called data streams has become a topic of active research in several fields of computer science such as, e.g., distributed systems, database systems, and data mining. A data stream can roughly be thought of as a transient, continuously increasing sequence of time-stamped data. In this paper, we consider the problem of clustering parallel streams of real-valued data, that is to say, continuously evolving time series. In other words, we are interested in grouping data streams the evolution over time of which is similar in a specific sense. In order to maintain an up-to-date clustering structure, it is necessary to analyze the incoming data in an online manner, tolerating not more than a constant time delay. For this purpose, we develop an efficient online version of the classical K-means clustering algorithm. Our method’s efficiency is mainly due to a scalable online transformation of the original data which allows for a fast computation of approximate distances between streams.
@article{Beringer2006180,
abstract = {In recent years, the management and processing of so-called data streams has become a topic of active research in several fields of computer science such as, e.g., distributed systems, database systems, and data mining. A data stream can roughly be thought of as a transient, continuously increasing sequence of time-stamped data. In this paper, we consider the problem of clustering parallel streams of real-valued data, that is to say, continuously evolving time series. In other words, we are interested in grouping data streams the evolution over time of which is similar in a specific sense. In order to maintain an up-to-date clustering structure, it is necessary to analyze the incoming data in an online manner, tolerating not more than a constant time delay. For this purpose, we develop an efficient online version of the classical K-means clustering algorithm. Our method’s efficiency is mainly due to a scalable online transformation of the original data which allows for a fast computation of approximate distances between streams.},
added-at = {2012-06-19T17:46:24.000+0200},
author = {Beringer, Jürgen and Hüllermeier, Eyke},
biburl = {https://www.bibsonomy.org/bibtex/2371ee00b48901f64fac820e05059af7c/mboley},
description = {ScienceDirect.com - Data & Knowledge Engineering - Online clustering of parallel data streams},
doi = {10.1016/j.datak.2005.05.009},
interhash = {d5806dbbee8894812608b30fddb41c01},
intrahash = {371ee00b48901f64fac820e05059af7c},
issn = {0169-023X},
journal = {Data & Knowledge Engineering},
keywords = {clustering online stream},
number = 2,
pages = {180 - 204},
timestamp = {2012-06-19T17:46:24.000+0200},
title = {Online clustering of parallel data streams},
url = {http://www.sciencedirect.com/science/article/pii/S0169023X05000819},
volume = 58,
year = 2006
}