Query processing in data integration occurs over network-bound, autonomous data sources. This requires extensions to traditional optimization and execution techniques for three reasons: there is an absence of quality statistics about the data, data transfer rates are unpredictable and bursty, and slow or unavailable data sources can often be replaced by overlapping or mirrored sources. This paper presents the Tukwila data integration system, designed to support adaptivity at its core using a two-pronged approach. Interleaved planning and execution with partial optimization allows Tukwila to quickly recover from decisions based on inaccurate estimates. During execution, Tukwila uses adaptive query operators such as the double pipelined hash join, which produces answers quickly, and the dynamic collector, which robustly and efficiently computes unions across overlapping data sources. We demonstrate that the Tukwila architecture extends previous innovations in adaptive execution (such as query scrambling, mid-execution re-optimization, and choose nodes), and we present experimental evidence that our techniques result in behavior desirable for a data integration system.
%0 Conference Paper
%1 ives_adaptive_1999
%A Ives, Zachary G.
%A Florescu, Daniela
%A Friedman, Marc
%A Levy, Alon
%A Weld, Daniel S.
%B Proceedings of the 1999 ACM SIGMOD international conference on Management of data
%C Philadelphia, Pennsylvania, United States
%D 1999
%I ACM
%K imported
%P 299--310
%R 10.1145/304182.304209
%T An adaptive query execution system for data integration
%U http://portal.acm.org/citation.cfm?id=304182.304209&type=series
%X Query processing in data integration occurs over network-bound, autonomous data sources. This requires extensions to traditional optimization and execution techniques for three reasons: there is an absence of quality statistics about the data, data transfer rates are unpredictable and bursty, and slow or unavailable data sources can often be replaced by overlapping or mirrored sources. This paper presents the Tukwila data integration system, designed to support adaptivity at its core using a two-pronged approach. Interleaved planning and execution with partial optimization allows Tukwila to quickly recover from decisions based on inaccurate estimates. During execution, Tukwila uses adaptive query operators such as the double pipelined hash join, which produces answers quickly, and the dynamic collector, which robustly and efficiently computes unions across overlapping data sources. We demonstrate that the Tukwila architecture extends previous innovations in adaptive execution (such as query scrambling, mid-execution re-optimization, and choose nodes), and we present experimental evidence that our techniques result in behavior desirable for a data integration system.
%@ 1-58113-084-8
@inproceedings{ives_adaptive_1999,
abstract = {Query processing in data integration occurs over network-bound, autonomous data sources. This requires extensions to traditional optimization and execution techniques for three reasons: there is an absence of quality statistics about the data, data transfer rates are unpredictable and bursty, and slow or unavailable data sources can often be replaced by overlapping or mirrored sources. This paper presents the Tukwila data integration system, designed to support adaptivity at its core using a two-pronged approach. Interleaved planning and execution with partial optimization allows Tukwila to quickly recover from decisions based on inaccurate estimates. During execution, Tukwila uses adaptive query operators such as the double pipelined hash join, which produces answers quickly, and the dynamic collector, which robustly and efficiently computes unions across overlapping data sources. We demonstrate that the Tukwila architecture extends previous innovations in adaptive execution (such as query scrambling, mid-execution re-optimization, and choose nodes), and we present experimental evidence that our techniques result in behavior desirable for a data integration system.},
added-at = {2008-12-28T21:33:49.000+0100},
address = {Philadelphia, Pennsylvania, United States},
author = {Ives, Zachary G. and Florescu, Daniela and Friedman, Marc and Levy, Alon and Weld, Daniel S.},
biburl = {https://www.bibsonomy.org/bibtex/2eb3a6bb42106736eb88a5d4cf70dc8fa/achraf.elghazi},
booktitle = {Proceedings of the 1999 ACM SIGMOD international conference on Management of data},
comment = {[todo]},
doi = {10.1145/304182.304209},
interhash = {bff5598db5ac2a8d2f18d0ecd7223724},
intrahash = {eb3a6bb42106736eb88a5d4cf70dc8fa},
isbn = {1-58113-084-8},
keywords = {imported},
pages = {299--310},
publisher = {ACM},
timestamp = {2008-12-30T11:03:21.000+0100},
title = {An adaptive query execution system for data integration},
url = {http://portal.acm.org/citation.cfm?id=304182.304209\&type=series},
year = 1999
}