We are interested in finding natural communities in large-scale linked networks. Our ultimate goal is to track changes over time in such communities. For such temporal tracking, we require a clustering algorithm that is relatively stable under small perturbations of the input data. We have developed an efficient, scalable agglomerative strategy and applied it to the citation graph of the NEC CiteSeer database (250,000 papers; 4.5 million citations). Agglomerative clustering techniques are known to be unstable on data in which the community structure is not strong. We find that some communities are essentially random and thus unstable while others are natural and will appear in most clusterings. These natural communities will enable us to track the evolution of communities over time.
%0 Conference Paper
%1 Hopcroft03naturalCommunities
%A Hopcroft, John
%A Khan, Omar
%A Kulis, Brian
%A Selman, Bart
%B KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining
%C New York, NY, USA
%D 2003
%I ACM
%K 03 Hopcroft community discovery toread
%P 541--546
%R http://doi.acm.org/10.1145/956750.956816
%T Natural communities in large linked networks
%U http://portal.acm.org/citation.cfm?id=956816&dl=GUIDE&coll=GUIDE&CFID=64167610&CFTOKEN=59359426
%X We are interested in finding natural communities in large-scale linked networks. Our ultimate goal is to track changes over time in such communities. For such temporal tracking, we require a clustering algorithm that is relatively stable under small perturbations of the input data. We have developed an efficient, scalable agglomerative strategy and applied it to the citation graph of the NEC CiteSeer database (250,000 papers; 4.5 million citations). Agglomerative clustering techniques are known to be unstable on data in which the community structure is not strong. We find that some communities are essentially random and thus unstable while others are natural and will appear in most clusterings. These natural communities will enable us to track the evolution of communities over time.
%@ 1-58113-737-0
@inproceedings{Hopcroft03naturalCommunities,
abstract = {We are interested in finding natural communities in large-scale linked networks. Our ultimate goal is to track changes over time in such communities. For such temporal tracking, we require a clustering algorithm that is relatively stable under small perturbations of the input data. We have developed an efficient, scalable agglomerative strategy and applied it to the citation graph of the NEC CiteSeer database (250,000 papers; 4.5 million citations). Agglomerative clustering techniques are known to be unstable on data in which the community structure is not strong. We find that some communities are essentially random and thus unstable while others are natural and will appear in most clusterings. These natural communities will enable us to track the evolution of communities over time.},
added-at = {2008-11-12T20:24:42.000+0100},
address = {New York, NY, USA},
author = {Hopcroft, John and Khan, Omar and Kulis, Brian and Selman, Bart},
biburl = {https://www.bibsonomy.org/bibtex/2460e2b381d278bd2dae163aea04563a0/lee_peck},
booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining},
description = {Natural communities in large linked networks},
doi = {http://doi.acm.org/10.1145/956750.956816},
interhash = {79adb3b5af24e6d151845715bd11d519},
intrahash = {460e2b381d278bd2dae163aea04563a0},
isbn = {1-58113-737-0},
keywords = {03 Hopcroft community discovery toread},
location = {Washington, D.C.},
pages = {541--546},
publisher = {ACM},
timestamp = {2009-02-02T16:47:28.000+0100},
title = {Natural communities in large linked networks},
url = {http://portal.acm.org/citation.cfm?id=956816&dl=GUIDE&coll=GUIDE&CFID=64167610&CFTOKEN=59359426},
year = 2003
}