Many of the available RDF datasets describe millions of resources by using billions of triples. Consequently, millions of links can potentially exist among such datasets. While parallel implementations of link discovery approaches have been developed in the past, load balancing approaches for local implementations of link discovery algorithms have been paid little attention to. In this paper, we thus present a novel load balancing technique for link discovery on parallel hardware based on particle-swarm optimization. We combine this approach with the Orchid algorithm for geo-spatial linking and evaluate it on real and artificial datasets. Our evaluation suggests that while na\"ıve approaches can be super-linear on small data sets, our deterministic particle swarm optimization outperforms both na\"ıve and classical load balancing approaches such as greedy load balancing on large datasets.
%0 Conference Paper
%1 sherifDPSO
%A Sherif, Mohamed Ahmed
%A Ngonga Ngomo, Axel-Cyrille
%B SEMANTiCS 2015
%D 2015
%K 2015 SIMBA dice geoknow group\_aksw limes ngonga sherif simba
%T An Optimization Approach for Load Balancing in Parallel Link Discovery
%U http://svn.aksw.org/papers/2015/SEMANTICS_DPSO/public.pdf
%X Many of the available RDF datasets describe millions of resources by using billions of triples. Consequently, millions of links can potentially exist among such datasets. While parallel implementations of link discovery approaches have been developed in the past, load balancing approaches for local implementations of link discovery algorithms have been paid little attention to. In this paper, we thus present a novel load balancing technique for link discovery on parallel hardware based on particle-swarm optimization. We combine this approach with the Orchid algorithm for geo-spatial linking and evaluate it on real and artificial datasets. Our evaluation suggests that while na\"ıve approaches can be super-linear on small data sets, our deterministic particle swarm optimization outperforms both na\"ıve and classical load balancing approaches such as greedy load balancing on large datasets.
@inproceedings{sherifDPSO,
abstract = {Many of the available RDF datasets describe millions of resources by using billions of triples. Consequently, millions of links can potentially exist among such datasets. While parallel implementations of link discovery approaches have been developed in the past, load balancing approaches for local implementations of link discovery algorithms have been paid little attention to. In this paper, we thus present a novel load balancing technique for link discovery on parallel hardware based on particle-swarm optimization. We combine this approach with the Orchid algorithm for geo-spatial linking and evaluate it on real and artificial datasets. Our evaluation suggests that while na{\"\i}ve approaches can be super-linear on small data sets, our deterministic particle swarm optimization outperforms both na{\"\i}ve and classical load balancing approaches such as greedy load balancing on large datasets.},
added-at = {2020-06-18T14:14:51.000+0200},
author = {Sherif, Mohamed Ahmed and {Ngonga Ngomo}, Axel-Cyrille},
bdsk-url-1 = {http://svn.aksw.org/papers/2015/SEMANTICS\_DPSO/public.pdf},
biburl = {https://www.bibsonomy.org/bibtex/24e24de80f2abe04d810acf764cbfa4f7/dice-research},
booktitle = {SEMANTiCS 2015},
interhash = {be3614a3105c61c98caef901e840d87d},
intrahash = {4e24de80f2abe04d810acf764cbfa4f7},
keywords = {2015 SIMBA dice geoknow group\_aksw limes ngonga sherif simba},
timestamp = {2023-04-25T16:34:03.000+0200},
title = {An Optimization Approach for Load Balancing in Parallel Link Discovery},
url = {http://svn.aksw.org/papers/2015/SEMANTICS_DPSO/public.pdf},
year = 2015
}