Big data presents new challenges to both cluster infrastructure software
and parallel application design. We present a set of software services
and design principles for data intensive computing with petabyte
data sets, named GrayWulf†. These services are intended for deployment
on a cluster of commodity servers similar to the well-known Beowulf
clusters. We use the Pan-STARRS system currently under development
as an example of the architecture and principles in action.
%0 Report
%1 Simmhan:msrtr:2008
%A Simmhan, Yogesh
%A Barga, Roger
%A van Ingen, Catharine
%A Nieto-Santisteban, Maria
%A Dobos, Lazslo
%A Li, Nolan
%A Shipway, Michael
%A Szalay, Alexander S.
%A Werner, Sue
%A Heasley, Jim
%D 2008
%K cloud, data escience, graywulf, hpc, management, msr, panstarrs trident, workflows,
%N MSR-TR-2008-186
%T GrayWulf: Scalable Software Architecture for Data Intensive Computing
%U http://research.microsoft.com/apps/pubs/default.aspx?id=79430
%X Big data presents new challenges to both cluster infrastructure software
and parallel application design. We present a set of software services
and design principles for data intensive computing with petabyte
data sets, named GrayWulf†. These services are intended for deployment
on a cluster of commodity servers similar to the well-known Beowulf
clusters. We use the Pan-STARRS system currently under development
as an example of the architecture and principles in action.
@techreport{Simmhan:msrtr:2008,
abstract = {Big data presents new challenges to both cluster infrastructure software
and parallel application design. We present a set of software services
and design principles for data intensive computing with petabyte
data sets, named GrayWulf†. These services are intended for deployment
on a cluster of commodity servers similar to the well-known Beowulf
clusters. We use the Pan-STARRS system currently under development
as an example of the architecture and principles in action.},
added-at = {2014-08-13T04:08:36.000+0200},
author = {Simmhan, Yogesh and Barga, Roger and van Ingen, Catharine and Nieto-Santisteban, Maria and Dobos, Lazslo and Li, Nolan and Shipway, Michael and Szalay, Alexander S. and Werner, Sue and Heasley, Jim},
biburl = {https://www.bibsonomy.org/bibtex/27b20e2550482bf2f68dc2130306139e4/simmhan},
institution = {Microsoft Research},
interhash = {302e8122c59cf8e20a039f7b3fb77085},
intrahash = {7b20e2550482bf2f68dc2130306139e4},
keywords = {cloud, data escience, graywulf, hpc, management, msr, panstarrs trident, workflows,},
month = {September},
note = {Extended version of HICSS 2009},
number = {MSR-TR-2008-186},
owner = {Simmhan},
timestamp = {2014-08-13T04:08:36.000+0200},
title = {GrayWulf: Scalable Software Architecture for Data Intensive Computing},
url = {http://research.microsoft.com/apps/pubs/default.aspx?id=79430},
year = 2008
}