The scientific computing community, especially academia is clearly
in need of technology to handle and organize the 1-100+ Terabyte
datasets coming from computer simulations and scientific instrumentation.
In this paper we briefly describe GrayWulf, an exemplar cluster for
data intensive applications using SQL Server and HPC Clusters. One
of the key software components of GrayWulf is Trident, a scientific
workflow workbench that performs automatic scheduling of workflows
across the cluster. We examine the challenges of scheduling workflows
on GrayWulf, algorithms to improve performance, and present early
results from applying Trident to schedule data loading workflows
on GrayWulf for an actual e-Science project
%0 Conference Paper
%1 Barga:clade:2008
%A Barga, Roger S.
%A Fay, Dan
%A Guo, Dean
%A Newhouse, Steven
%A Simmhan, Yogesh
%A Szalay, Alex
%B International Workshop on Challenges of Large Applications in Distributed
Environments (CLADE)
%D 2008
%I ACM
%K data eScience, hpc, intensive, msr, peer reviewed scheduling, workflow,
%P 63-68
%R 10.1145/1383529.1383545
%T Efficient scheduling of scientific workflows in a high performance
computing cluster
%X The scientific computing community, especially academia is clearly
in need of technology to handle and organize the 1-100+ Terabyte
datasets coming from computer simulations and scientific instrumentation.
In this paper we briefly describe GrayWulf, an exemplar cluster for
data intensive applications using SQL Server and HPC Clusters. One
of the key software components of GrayWulf is Trident, a scientific
workflow workbench that performs automatic scheduling of workflows
across the cluster. We examine the challenges of scheduling workflows
on GrayWulf, algorithms to improve performance, and present early
results from applying Trident to schedule data loading workflows
on GrayWulf for an actual e-Science project
%@ 978-1-60558-156-9
@inproceedings{Barga:clade:2008,
abstract = {The scientific computing community, especially academia is clearly
in need of technology to handle and organize the 1-100+ Terabyte
datasets coming from computer simulations and scientific instrumentation.
In this paper we briefly describe GrayWulf, an exemplar cluster for
data intensive applications using SQL Server and HPC Clusters. One
of the key software components of GrayWulf is Trident, a scientific
workflow workbench that performs automatic scheduling of workflows
across the cluster. We examine the challenges of scheduling workflows
on GrayWulf, algorithms to improve performance, and present early
results from applying Trident to schedule data loading workflows
on GrayWulf for an actual e-Science project},
acmid = {1383545},
added-at = {2014-08-13T04:08:36.000+0200},
author = {Barga, Roger S. and Fay, Dan and Guo, Dean and Newhouse, Steven and Simmhan, Yogesh and Szalay, Alex},
biburl = {https://www.bibsonomy.org/bibtex/23a550432829829f98b4ba2f3f436637e/simmhan},
booktitle = {International Workshop on Challenges of Large Applications in Distributed
Environments (CLADE)},
doi = {10.1145/1383529.1383545},
interhash = {9b471a91584185648adde6c7cca1d17d},
intrahash = {3a550432829829f98b4ba2f3f436637e},
isbn = {978-1-60558-156-9},
keywords = {data eScience, hpc, intensive, msr, peer reviewed scheduling, workflow,},
location = {Boston, MA, USA},
note = {[CORE C]},
numpages = {6},
owner = {Simmhan},
pages = {63-68},
publisher = {ACM},
timestamp = {2014-08-13T04:08:36.000+0200},
title = {Efficient scheduling of scientific workflows in a high performance
computing cluster},
year = 2008
}