This paper describes the outcome of an attempt to implement the same transitive closure (TC) algorithm
for Apache MapReduce running on different Apache Hadoop distributions. Apache MapReduce is a
software framework used with Apache Hadoop, which has become the de facto standard platform for
processing and storing large amounts of data in a distributed computing environment. The research
presented here focuses on the variations observed among the results of an efficient iterative transitive
closure algorithm when run against different distributed environments. The results from these comparisons
were validated against the benchmark results from OYSTER, an open source Entity Resolution system. The
experiment results highlighted the inconsistencies that can occur when using the same codebase with
different implementations of Map Reduce.
%0 Journal Article
%1 noauthororeditor
%A Parmar, Purvi
%A Morris, MaryEtta
%A Talburt, John R.
%A Syed, Huzaifa F.
%D 2020
%J International Journal of Computer Science & Information Technology (IJCSIT)
%K engineering information
%N 4
%P 27 -34
%R 10.5121/ijcsit.2020.12403
%T Variations in Outcome for the Same Map Reduce Transitive Closure Algorithm Implemented on Different Hadoop Platforms
%U https://aircconline.com/ijcsit/V12N4/12420ijcsit03.pdf
%V 12
%X This paper describes the outcome of an attempt to implement the same transitive closure (TC) algorithm
for Apache MapReduce running on different Apache Hadoop distributions. Apache MapReduce is a
software framework used with Apache Hadoop, which has become the de facto standard platform for
processing and storing large amounts of data in a distributed computing environment. The research
presented here focuses on the variations observed among the results of an efficient iterative transitive
closure algorithm when run against different distributed environments. The results from these comparisons
were validated against the benchmark results from OYSTER, an open source Entity Resolution system. The
experiment results highlighted the inconsistencies that can occur when using the same codebase with
different implementations of Map Reduce.
@article{noauthororeditor,
abstract = {This paper describes the outcome of an attempt to implement the same transitive closure (TC) algorithm
for Apache MapReduce running on different Apache Hadoop distributions. Apache MapReduce is a
software framework used with Apache Hadoop, which has become the de facto standard platform for
processing and storing large amounts of data in a distributed computing environment. The research
presented here focuses on the variations observed among the results of an efficient iterative transitive
closure algorithm when run against different distributed environments. The results from these comparisons
were validated against the benchmark results from OYSTER, an open source Entity Resolution system. The
experiment results highlighted the inconsistencies that can occur when using the same codebase with
different implementations of Map Reduce.
},
added-at = {2020-09-15T09:14:23.000+0200},
author = {Parmar, Purvi and Morris, MaryEtta and Talburt, John R. and Syed, Huzaifa F.},
biburl = {https://www.bibsonomy.org/bibtex/299cfc69928049c4007362d050e76f48d/shamerjose},
doi = {10.5121/ijcsit.2020.12403},
interhash = {9ff325a4468c2a035a68dbee8f34d1f7},
intrahash = {99cfc69928049c4007362d050e76f48d},
journal = {International Journal of Computer Science & Information Technology (IJCSIT) },
keywords = {engineering information},
month = {August },
number = 4,
pages = {27 -34},
timestamp = {2020-09-15T09:14:23.000+0200},
title = {Variations in Outcome for the Same Map Reduce Transitive Closure Algorithm Implemented on Different Hadoop Platforms},
url = {https://aircconline.com/ijcsit/V12N4/12420ijcsit03.pdf},
volume = 12,
year = 2020
}