The amount of bio-medical data available on the Web grows exponentially with time. The resulting large
volume of data makes the manual exploration of this data very tedious. Moreover, the velocity at which this
data changes and the variety of formats in which bio-medical is published makes it dicult to access them
in an integrated form. Finally, the lack of an integrated vocabulary makes querying this data more dicult.
In this paper, we advocate the use of Linked Data to integrate, query and visualize bio-medical data. The
resulting Big Linked Data allows discovering knowledge distributed across manifold sources, making it viable
for the serendipitous discovery of novel knowledge. We display the concept of Big Linked Data by showing
how the constant stream of new bio-medical publications can be integrated with 7.36 billion triples from the
Linked Cancer Genome Atlas dataset (TCGA) within a virtual integration scenario. Then, we show how
we can harness the value hidden in the underlying integrated data by making it easier to explore through a
user-friendly interface. Further, we demonstrate the scalability of our approach by presenting and evaluating
the novel TopFed federated query engine. The evaluation is achieved by comparing the query execution time
of our system with that of FedX on Linked TCGA
%0 Journal Article
%1 saleem2014ltcgapubmed
%A Saleem, Muhammad
%A Kamdar, Maulik
%A Iqbal, Aftab
%A Sampath, Shanmukha
%A Deus, Helena
%A Ngonga Ngomo, Axel-Cyrille
%D 2014
%E of Web Semantics, Journal
%J Under-review: Journal of web semantics
%K big data saleem
%T Big Linked Cancer Data: Integrating Linked TCGA and PubMed
%U http://svn.aksw.org/papers/2014/JWS_TCGA_Pubmed_Integration/public.pdf
%X The amount of bio-medical data available on the Web grows exponentially with time. The resulting large
volume of data makes the manual exploration of this data very tedious. Moreover, the velocity at which this
data changes and the variety of formats in which bio-medical is published makes it dicult to access them
in an integrated form. Finally, the lack of an integrated vocabulary makes querying this data more dicult.
In this paper, we advocate the use of Linked Data to integrate, query and visualize bio-medical data. The
resulting Big Linked Data allows discovering knowledge distributed across manifold sources, making it viable
for the serendipitous discovery of novel knowledge. We display the concept of Big Linked Data by showing
how the constant stream of new bio-medical publications can be integrated with 7.36 billion triples from the
Linked Cancer Genome Atlas dataset (TCGA) within a virtual integration scenario. Then, we show how
we can harness the value hidden in the underlying integrated data by making it easier to explore through a
user-friendly interface. Further, we demonstrate the scalability of our approach by presenting and evaluating
the novel TopFed federated query engine. The evaluation is achieved by comparing the query execution time
of our system with that of FedX on Linked TCGA
@article{saleem2014ltcgapubmed,
abstract = {The amount of bio-medical data available on the Web grows exponentially with time. The resulting large
volume of data makes the manual exploration of this data very tedious. Moreover, the velocity at which this
data changes and the variety of formats in which bio-medical is published makes it dicult to access them
in an integrated form. Finally, the lack of an integrated vocabulary makes querying this data more dicult.
In this paper, we advocate the use of Linked Data to integrate, query and visualize bio-medical data. The
resulting Big Linked Data allows discovering knowledge distributed across manifold sources, making it viable
for the serendipitous discovery of novel knowledge. We display the concept of Big Linked Data by showing
how the constant stream of new bio-medical publications can be integrated with 7.36 billion triples from the
Linked Cancer Genome Atlas dataset (TCGA) within a virtual integration scenario. Then, we show how
we can harness the value hidden in the underlying integrated data by making it easier to explore through a
user-friendly interface. Further, we demonstrate the scalability of our approach by presenting and evaluating
the novel TopFed federated query engine. The evaluation is achieved by comparing the query execution time
of our system with that of FedX on Linked TCGA},
added-at = {2014-03-20T11:30:54.000+0100},
author = {Saleem, Muhammad and Kamdar, Maulik and Iqbal, Aftab and Sampath, Shanmukha and Deus, Helena and Ngonga Ngomo, Axel-Cyrille},
biburl = {https://www.bibsonomy.org/bibtex/2343dd9072d708e36366a2fbf13e1a9a5/msaleem},
editor = {of Web Semantics, Journal},
interhash = {bd381110de4d2bd34ba98e8fa2010dfd},
intrahash = {343dd9072d708e36366a2fbf13e1a9a5},
journal = {Under-review: Journal of web semantics},
keywords = {big data saleem},
timestamp = {2014-03-20T11:32:08.000+0100},
title = {Big Linked Cancer Data: Integrating Linked TCGA and PubMed},
url = {http://svn.aksw.org/papers/2014/JWS_TCGA_Pubmed_Integration/public.pdf},
year = 2014
}