The widely discussed scientific data deluge creates not only a need
to computationally scale an application from a local desktop or cluster
to a supercomputer, but also the need to cope with variable data
loads over time. Cloud computing offers a scalable, economic, on-demand
model well matched to the evolving eScience needs. Yet cloud computing
creates gaps that must be crossed to move science applications to
the cloud. In this article, we propose a Generic Worker framework
to deploy and invoke science applications in the Cloud with minimal
user effort and predictable, cost-effective performance. Our framework
is an evolution of Grid computing application factory pattern and
addresses the distinct challenges posed by the Cloud such as efficient
data transfers to and from the Cloud, and the transient nature of
its VMs. We present an implementation of the Generic Worker for the
Microsoft Azure Cloud and evaluate its use in a genome sequencing
application pipeline. Our results show that the user overhead to
port and run the application seamlessly across desktop and the Cloud
can be substantially reduced without significant performance penalties,
while providing on-demand scalability.
%0 Report
%1 Simmhan:msrtr:2009a
%A Simmhan, Yogesh
%A van Ingen, Catharine
%A Subramanian, Girish
%A Li, Jie
%D 2009
%K cloud, escience, generic genomics msr, worker, workflow,
%N MSR-TR-2009-2021
%T Bridging the Gap between the Cloud and an eScience Application Platform
%U http://research.microsoft.com/apps/pubs/default.aspx?id=118329
%X The widely discussed scientific data deluge creates not only a need
to computationally scale an application from a local desktop or cluster
to a supercomputer, but also the need to cope with variable data
loads over time. Cloud computing offers a scalable, economic, on-demand
model well matched to the evolving eScience needs. Yet cloud computing
creates gaps that must be crossed to move science applications to
the cloud. In this article, we propose a Generic Worker framework
to deploy and invoke science applications in the Cloud with minimal
user effort and predictable, cost-effective performance. Our framework
is an evolution of Grid computing application factory pattern and
addresses the distinct challenges posed by the Cloud such as efficient
data transfers to and from the Cloud, and the transient nature of
its VMs. We present an implementation of the Generic Worker for the
Microsoft Azure Cloud and evaluate its use in a genome sequencing
application pipeline. Our results show that the user overhead to
port and run the application seamlessly across desktop and the Cloud
can be substantially reduced without significant performance penalties,
while providing on-demand scalability.
@techreport{Simmhan:msrtr:2009a,
abstract = {The widely discussed scientific data deluge creates not only a need
to computationally scale an application from a local desktop or cluster
to a supercomputer, but also the need to cope with variable data
loads over time. Cloud computing offers a scalable, economic, on-demand
model well matched to the evolving eScience needs. Yet cloud computing
creates gaps that must be crossed to move science applications to
the cloud. In this article, we propose a Generic Worker framework
to deploy and invoke science applications in the Cloud with minimal
user effort and predictable, cost-effective performance. Our framework
is an evolution of Grid computing application factory pattern and
addresses the distinct challenges posed by the Cloud such as efficient
data transfers to and from the Cloud, and the transient nature of
its VMs. We present an implementation of the Generic Worker for the
Microsoft Azure Cloud and evaluate its use in a genome sequencing
application pipeline. Our results show that the user overhead to
port and run the application seamlessly across desktop and the Cloud
can be substantially reduced without significant performance penalties,
while providing on-demand scalability.},
added-at = {2014-08-13T04:08:36.000+0200},
author = {Simmhan, Yogesh and van Ingen, Catharine and Subramanian, Girish and Li, Jie},
biburl = {https://www.bibsonomy.org/bibtex/2cb10dda1d0994cb87b8ca1e816e62422/simmhan},
institution = {Microsoft Research},
interhash = {a0532bb4ca9aaf8052495f920768d34c},
intrahash = {cb10dda1d0994cb87b8ca1e816e62422},
keywords = {cloud, escience, generic genomics msr, worker, workflow,},
month = {November},
note = {Extended version of IEEE Cloud 2010},
number = {MSR-TR-2009-2021},
owner = {Simmhan},
timestamp = {2014-08-13T04:08:36.000+0200},
title = {Bridging the Gap between the Cloud and an eScience Application Platform},
url = {http://research.microsoft.com/apps/pubs/default.aspx?id=118329},
year = 2009
}