Categorical data clustering constitutes an important part of data mining; its relevance has recently drawn attention from several researchers. As a step in data mining, however, clustering encounters the problem of large amount of data to be processed. This article offers a solution for categorical clustering algorithms when working with high volumes of data by means of a method that summarizes the database. This is done using a structure called CM-tree. In order to test our method, the K-Modes and Click clustering algorithms were used with several databases. Experiments demonstrate that the proposed summarization method improves execution time, without losing clustering quality.
%0 Book Section
%1 Rendon2008
%A Rendón, Eréndira
%A Sánchez, J.
%A Garcia, Rene
%A Abundez, Itzel
%A Gutierrez, Citlalih
%A Gasca, Eduardo
%D 2008
%J Advances in Artificial Intelligence – IBERAMIA 2008
%K clustering db entityguides phd summary
%P 143--152
%R http://dx.doi.org/10.1007/978-3-540-88309-8\_15
%T Data Reduction Method for Categorical Data Clustering
%U http://dx.doi.org/10.1007/978-3-540-88309-8\_15
%X Categorical data clustering constitutes an important part of data mining; its relevance has recently drawn attention from several researchers. As a step in data mining, however, clustering encounters the problem of large amount of data to be processed. This article offers a solution for categorical clustering algorithms when working with high volumes of data by means of a method that summarizes the database. This is done using a structure called CM-tree. In order to test our method, the K-Modes and Click clustering algorithms were used with several databases. Experiments demonstrate that the proposed summarization method improves execution time, without losing clustering quality.
@incollection{Rendon2008,
abstract = {Categorical data clustering constitutes an important part of data mining; its relevance has recently drawn attention from several researchers. As a step in data mining, however, clustering encounters the problem of large amount of data to be processed. This article offers a solution for categorical clustering algorithms when working with high volumes of data by means of a method that summarizes the database. This is done using a structure called CM-tree. In order to test our method, the K-Modes and Click clustering algorithms were used with several databases. Experiments demonstrate that the proposed summarization method improves execution time, without losing clustering quality.},
added-at = {2009-03-12T15:42:50.000+0100},
author = {Rend\'{o}n, Er\'{e}ndira and S\'{a}nchez, J. and Garcia, Rene and Abundez, Itzel and Gutierrez, Citlalih and Gasca, Eduardo},
biburl = {https://www.bibsonomy.org/bibtex/20854a9e4e39403932e07076e489ec09a/lillejul},
citeulike-article-id = {3987830},
doi = {http://dx.doi.org/10.1007/978-3-540-88309-8\_15},
interhash = {2da84b813f8ae3d0ec312aab4375f417},
intrahash = {0854a9e4e39403932e07076e489ec09a},
journal = {Advances in Artificial Intelligence – IBERAMIA 2008},
keywords = {clustering db entityguides phd summary},
pages = {143--152},
posted-at = {2009-01-31 09:34:44},
priority = {3},
timestamp = {2009-03-12T15:42:50.000+0100},
title = {Data Reduction Method for Categorical Data Clustering},
url = {http://dx.doi.org/10.1007/978-3-540-88309-8\_15},
year = 2008
}