The growing recognition of the importance of provenance for data intensive and multidisciplinary domains is leading to careful collection of provenance. One consequence of this is the proliferation of provenance repositories hosted for individual organization or communities, with limited ability to reconstruct and query for and on provenance across them. Community standards like the Open Provenance Model (OPM) allow uniform interpretation and exchange of provenance metadata but do not prescribe query or service specifications to access provenance. If data reuse and sharing across institutions is not accompanied by passing provenance at the time of data exchange, we need to track the provenance and query for them or over them across distributed provenance repositories. In this article, we present approaches for querying over distributed provenance information, and address two common provenance query models that we formalize: provenance retrieval query and provenance filter query. Our problem is motivated by Smart Oilfield applications in the energy informatics domain, and we evaluate the performance of our algorithms using synthetic workflows based on the domain.
%0 Journal Article
%1 Zhao:ijca:2011
%A Zhao, Jing
%A Simmhan, Yogesh
%A Gomadam, Karthik
%A Prasanna, Viktor K.
%D 2011
%I ISCA
%J International Journal of Computers and Their Applications (IJCA)
%K issue oilfield, peer provenance, reviewed, smart special usc,
%N 3
%P 196--215
%T Querying Provenance Information in Distributed Environments
%U http://ceng.usc.edu/~simmhan/pubs/zhao-ijca-2011.pdf
%V 18
%X The growing recognition of the importance of provenance for data intensive and multidisciplinary domains is leading to careful collection of provenance. One consequence of this is the proliferation of provenance repositories hosted for individual organization or communities, with limited ability to reconstruct and query for and on provenance across them. Community standards like the Open Provenance Model (OPM) allow uniform interpretation and exchange of provenance metadata but do not prescribe query or service specifications to access provenance. If data reuse and sharing across institutions is not accompanied by passing provenance at the time of data exchange, we need to track the provenance and query for them or over them across distributed provenance repositories. In this article, we present approaches for querying over distributed provenance information, and address two common provenance query models that we formalize: provenance retrieval query and provenance filter query. Our problem is motivated by Smart Oilfield applications in the energy informatics domain, and we evaluate the performance of our algorithms using synthetic workflows based on the domain.
@article{Zhao:ijca:2011,
abstract = {The growing recognition of the importance of provenance for data intensive and multidisciplinary domains is leading to careful collection of provenance. One consequence of this is the proliferation of provenance repositories hosted for individual organization or communities, with limited ability to reconstruct and query for and on provenance across them. Community standards like the Open Provenance Model (OPM) allow uniform interpretation and exchange of provenance metadata but do not prescribe query or service specifications to access provenance. If data reuse and sharing across institutions is not accompanied by passing provenance at the time of data exchange, we need to track the provenance and query for them or over them across distributed provenance repositories. In this article, we present approaches for querying over distributed provenance information, and address two common provenance query models that we formalize: provenance retrieval query and provenance filter query. Our problem is motivated by Smart Oilfield applications in the energy informatics domain, and we evaluate the performance of our algorithms using synthetic workflows based on the domain.},
added-at = {2023-04-07T07:37:58.000+0200},
author = {Zhao, Jing and Simmhan, Yogesh and Gomadam, Karthik and Prasanna, Viktor K.},
biburl = {https://www.bibsonomy.org/bibtex/267ef1dc2893f2e6d5be7258f8f800eb9/vinayaka2000},
interhash = {42d92e12f65c9a40599ec777dd060e2e},
intrahash = {67ef1dc2893f2e6d5be7258f8f800eb9},
issn = {1076-5204},
journal = {International Journal of Computers and Their Applications (IJCA)},
keywords = {issue oilfield, peer provenance, reviewed, smart special usc,},
month = {September},
number = 3,
owner = {Simmhan},
pages = {196--215},
publisher = {ISCA},
timestamp = {2023-04-07T07:37:58.000+0200},
title = {Querying Provenance Information in Distributed Environments},
url = {http://ceng.usc.edu/~simmhan/pubs/zhao-ijca-2011.pdf},
volume = 18,
year = 2011
}