The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(m d log n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in which case our algorithm runs in essentially linear time, O(n log^2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400,000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.
%0 Journal Article
%1 Clauset:2004p5519
%A Clauset, Aaron
%A Newman, Mark E. J
%A Moore, Cristopher
%D 2004
%J arXiv
%K cond-mat.dis-nn cond-mat.stat-mech,
%T Finding community structure in very large networks
%U http://arxiv.org/abs/cond-mat/0408187v2
%V cond-mat.stat-mech
%X The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(m d log n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in which case our algorithm runs in essentially linear time, O(n log^2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400,000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.
@article{Clauset:2004p5519,
abstract = { The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(m d log n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in which case our algorithm runs in essentially linear time, O(n log^2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400,000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers. },
added-at = {2008-03-13T16:33:57.000+0100},
author = {Clauset, Aaron and Newman, Mark E. J and Moore, Cristopher},
biburl = {https://www.bibsonomy.org/bibtex/2023ab5f6ceb9582580aaf6c8506a9d31/bertil.hatt},
date-added = {2008-03-13 10:48:52 +0100},
date-modified = {2008-03-13 14:41:52 +0100},
description = {March 2008},
interhash = {2c68e3c981a00380692a3b0b661d7cfd},
intrahash = {023ab5f6ceb9582580aaf6c8506a9d31},
journal = {arXiv},
keywords = {cond-mat.dis-nn cond-mat.stat-mech,},
month = Aug,
note = {Published in: Phys. Rev. E 70, 066111 (2004)},
pmid = {cond-mat/0408187v2},
rating = {0},
timestamp = {2008-03-13T16:34:09.000+0100},
title = {Finding community structure in very large networks},
uri = {papers://C3B117CD-23C4-4854-9426-AC96AFB113DA/Paper/p5519},
url = {http://arxiv.org/abs/cond-mat/0408187v2},
volume = {cond-mat.stat-mech},
year = 2004
}