J. Kubica, A. Moore, and J. Schneider. ICDM '03: Proceedings of the Third IEEE International Conference on Data Mining, page 573--576. Washington, DC, USA, IEEE Computer Society, (November 2003)
Abstract
Discovering underlying structure from co-occurrence data is an important task in a variety of fields, including: insurance, intelligence, criminal investigation, epidemiology, human resources, and marketing. Previously Kubica et. al. presented the group detection algorithm (GDA) - an algorithm for finding underlying groupings of entities from co-occurrence data. This algorithm is based on a probabilistic generative model and produces coherent groups that are consistent with prior knowledge. Unfortunately, the optimization used in GDA is slow, potentially making it infeasible for many large data sets. To this end, we present k-groups - an algorithm that uses an approach similar to that of k-means to significantly accelerate the discovery of groups while retaining GDA's probabilistic model. We compare the performance of GDA and k-groups on a variety of data, showing that k-groups' sacrifice in solution quality is significantly offset by its increase in speed.
%0 Conference Paper
%1 citeulike:580809
%A Kubica, Jeremy
%A Moore, Andrew
%A Schneider, Jeff
%B ICDM '03: Proceedings of the Third IEEE International Conference on Data Mining
%C Washington, DC, USA
%D 2003
%E Wu, Xindong
%E Tuzhilin, Alex
%E Shavlik, Jude
%I IEEE Computer Society
%K topicinference community
%P 573--576
%T Tractable Group Detection on Large Link Data Sets
%U http://portal.acm.org/citation.cfm?id=951949.952107
%X Discovering underlying structure from co-occurrence data is an important task in a variety of fields, including: insurance, intelligence, criminal investigation, epidemiology, human resources, and marketing. Previously Kubica et. al. presented the group detection algorithm (GDA) - an algorithm for finding underlying groupings of entities from co-occurrence data. This algorithm is based on a probabilistic generative model and produces coherent groups that are consistent with prior knowledge. Unfortunately, the optimization used in GDA is slow, potentially making it infeasible for many large data sets. To this end, we present k-groups - an algorithm that uses an approach similar to that of k-means to significantly accelerate the discovery of groups while retaining GDA's probabilistic model. We compare the performance of GDA and k-groups on a variety of data, showing that k-groups' sacrifice in solution quality is significantly offset by its increase in speed.
%@ 0769519784
@inproceedings{citeulike:580809,
abstract = {Discovering underlying structure from co-occurrence data is an important task in a variety of fields, including: insurance, intelligence, criminal investigation, epidemiology, human resources, and marketing. Previously Kubica et. al. presented the group detection algorithm (GDA) - an algorithm for finding underlying groupings of entities from co-occurrence data. This algorithm is based on a probabilistic generative model and produces coherent groups that are consistent with prior knowledge. Unfortunately, the optimization used in GDA is slow, potentially making it infeasible for many large data sets. To this end, we present k-groups - an algorithm that uses an approach similar to that of k-means to significantly accelerate the discovery of groups while retaining GDA's probabilistic model. We compare the performance of GDA and k-groups on a variety of data, showing that k-groups' sacrifice in solution quality is significantly offset by its increase in speed.},
added-at = {2006-09-25T12:54:00.000+0200},
address = {Washington, DC, USA},
author = {Kubica, Jeremy and Moore, Andrew and Schneider, Jeff},
biburl = {https://www.bibsonomy.org/bibtex/2602bdfa6c57fd20ffba4366cef6721be/grahl},
booktitle = {ICDM '03: Proceedings of the Third IEEE International Conference on Data Mining},
citeulike-article-id = {580809},
editor = {Wu, Xindong and Tuzhilin, Alex and Shavlik, Jude},
interhash = {0d7be00e85fa41a082bab454c0665126},
intrahash = {602bdfa6c57fd20ffba4366cef6721be},
isbn = {0769519784},
keywords = {topicinference community},
month = {November},
pages = {573--576},
priority = {0},
publisher = {IEEE Computer Society},
timestamp = {2006-09-25T12:54:00.000+0200},
title = {Tractable Group Detection on Large Link Data Sets},
url = {http://portal.acm.org/citation.cfm?id=951949.952107},
year = 2003
}