Provenance is increasingly recognized as being critical to the understanding
and reuse of scientific datasets. Given the rapid generation of scientific
data from sensors and computational model results, it is not practical
to manually record provenance for data and automated techniques for
provenance capture are essential. Scientific workflows provide a
framework for representing computational models and complex transformations
of scientific data, and present a means for tracking the operations
performed to derive a dataset. The Trident Scientific Workbench is
a workflow system that natively incorporates provenance capture of
data derived as part of the workflow execution. The applications
used as part of a Trident workflow can execute on remote computational
cluster, such as a supercomputing center on in the Cloud, or on the
local desktop of the researcher and provenance on data derived by
the applications is seamlessly captured. Scientists also have the
option to annotate the provenance metadata using domain specific
tags, such as, for example, GCMD keywords. The provenance records
thus captured can be exported in the Open Provenance Model XML standard
that is emerging or visualized as a graph. The Trident system and
provenance recorded by it has been successfully applied in the Neptune
oceanography project and is presently being tested in the Pan-STARRS
astronomy project.
%0 Conference Paper
%1 Simmhan:agu:2008
%A Simmhan, Yogesh
%A Barga, Roger
%A van Ingen, Catharine
%B American Geophysical Union (AGU) Fall Meeting
%D 2008
%I AGU
%K escience, msr, poster provenance, trident,
%T Automatic Provenance Recording for Scientific Data using Trident
%U http://adsabs.harvard.edu/abs/2008AGUFMIN11C1048S
%X Provenance is increasingly recognized as being critical to the understanding
and reuse of scientific datasets. Given the rapid generation of scientific
data from sensors and computational model results, it is not practical
to manually record provenance for data and automated techniques for
provenance capture are essential. Scientific workflows provide a
framework for representing computational models and complex transformations
of scientific data, and present a means for tracking the operations
performed to derive a dataset. The Trident Scientific Workbench is
a workflow system that natively incorporates provenance capture of
data derived as part of the workflow execution. The applications
used as part of a Trident workflow can execute on remote computational
cluster, such as a supercomputing center on in the Cloud, or on the
local desktop of the researcher and provenance on data derived by
the applications is seamlessly captured. Scientists also have the
option to annotate the provenance metadata using domain specific
tags, such as, for example, GCMD keywords. The provenance records
thus captured can be exported in the Open Provenance Model XML standard
that is emerging or visualized as a graph. The Trident system and
provenance recorded by it has been successfully applied in the Neptune
oceanography project and is presently being tested in the Pan-STARRS
astronomy project.
@inproceedings{Simmhan:agu:2008,
abstract = {Provenance is increasingly recognized as being critical to the understanding
and reuse of scientific datasets. Given the rapid generation of scientific
data from sensors and computational model results, it is not practical
to manually record provenance for data and automated techniques for
provenance capture are essential. Scientific workflows provide a
framework for representing computational models and complex transformations
of scientific data, and present a means for tracking the operations
performed to derive a dataset. The Trident Scientific Workbench is
a workflow system that natively incorporates provenance capture of
data derived as part of the workflow execution. The applications
used as part of a Trident workflow can execute on remote computational
cluster, such as a supercomputing center on in the Cloud, or on the
local desktop of the researcher and provenance on data derived by
the applications is seamlessly captured. Scientists also have the
option to annotate the provenance metadata using domain specific
tags, such as, for example, GCMD keywords. The provenance records
thus captured can be exported in the Open Provenance Model XML standard
that is emerging or visualized as a graph. The Trident system and
provenance recorded by it has been successfully applied in the Neptune
oceanography project and is presently being tested in the Pan-STARRS
astronomy project.},
added-at = {2014-08-13T04:08:36.000+0200},
author = {Simmhan, Yogesh and Barga, Roger and van Ingen, Catharine},
biburl = {https://www.bibsonomy.org/bibtex/209f4341a43c32eb2edcc03edc471512f/simmhan},
booktitle = {American Geophysical Union (AGU) Fall Meeting},
interhash = {0e3c0a44f05947434f658d5f9c207431},
intrahash = {09f4341a43c32eb2edcc03edc471512f},
keywords = {escience, msr, poster provenance, trident,},
note = {Poster},
owner = {Simmhan},
publisher = {AGU},
timestamp = {2014-08-13T04:08:36.000+0200},
title = {Automatic Provenance Recording for Scientific Data using Trident},
url = {http://adsabs.harvard.edu/abs/2008AGUFMIN11C1048S},
year = 2008
}