The discovery and analysis of community structure in networks is a topic of
considerable recent interest within the physics community, but most methods
proposed so far are unsuitable for very large networks because of their
computational cost. Here we present a hierarchical agglomeration algorithm for
detecting community structure which is faster than many competing algorithms:
its running time on a network with n vertices and m edges is O(m d log n) where
d is the depth of the dendrogram describing the community structure. Many
real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in
which case our algorithm runs in essentially linear time, O(n log^2 n). As an
example of the application of this algorithm we use it to analyze a network of
items for sale on the web-site of a large online retailer, items in the network
being linked if they are frequently purchased by the same buyer. The network
has more than 400,000 vertices and 2 million edges. We show that our algorithm
can extract meaningful communities from this network, revealing large-scale
patterns present in the purchasing habits of customers.
%0 Generic
%1 citeulike:95936
%A Clauset, Aaron
%A Newman, M. E. J.
%A Moore, Cristopher
%D 2004
%K clustering large network community
%T Finding community structure in very large networks
%U http://arxiv.org/abs/cond-mat/0408187
%X The discovery and analysis of community structure in networks is a topic of
considerable recent interest within the physics community, but most methods
proposed so far are unsuitable for very large networks because of their
computational cost. Here we present a hierarchical agglomeration algorithm for
detecting community structure which is faster than many competing algorithms:
its running time on a network with n vertices and m edges is O(m d log n) where
d is the depth of the dendrogram describing the community structure. Many
real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in
which case our algorithm runs in essentially linear time, O(n log^2 n). As an
example of the application of this algorithm we use it to analyze a network of
items for sale on the web-site of a large online retailer, items in the network
being linked if they are frequently purchased by the same buyer. The network
has more than 400,000 vertices and 2 million edges. We show that our algorithm
can extract meaningful communities from this network, revealing large-scale
patterns present in the purchasing habits of customers.
@misc{citeulike:95936,
abstract = {The discovery and analysis of community structure in networks is a topic of
considerable recent interest within the physics community, but most methods
proposed so far are unsuitable for very large networks because of their
computational cost. Here we present a hierarchical agglomeration algorithm for
detecting community structure which is faster than many competing algorithms:
its running time on a network with n vertices and m edges is O(m d log n) where
d is the depth of the dendrogram describing the community structure. Many
real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in
which case our algorithm runs in essentially linear time, O(n log^2 n). As an
example of the application of this algorithm we use it to analyze a network of
items for sale on the web-site of a large online retailer, items in the network
being linked if they are frequently purchased by the same buyer. The network
has more than 400,000 vertices and 2 million edges. We show that our algorithm
can extract meaningful communities from this network, revealing large-scale
patterns present in the purchasing habits of customers.},
added-at = {2006-02-15T08:00:29.000+0100},
author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher},
biburl = {https://www.bibsonomy.org/bibtex/2f9a12630a6d31d576ea5222219a4cf0b/hotho},
citeulike-article-id = {95936},
eprint = {cond-mat/0408187},
interhash = {2c68e3c981a00380692a3b0b661d7cfd},
intrahash = {f9a12630a6d31d576ea5222219a4cf0b},
keywords = {clustering large network community},
month = {August},
priority = {0},
timestamp = {2006-02-15T08:00:29.000+0100},
title = {Finding community structure in very large networks},
url = {http://arxiv.org/abs/cond-mat/0408187},
year = 2004
}