Technologies for profiling samples using different omics platforms have been at the forefront since the human genome project. Large-scale multi-omics data hold the promise of deciphering different regulatory layers. Yet, while there is a myriad of bioinformatics tools, each multi-omics analysis appears to start from scratch with an arbitrary decision over which tools to use and how to combine them. It is therefore an unmet need to conceptualize how to integrate such data and to implement and validate pipelines in different cases. We have designed a conceptual framework (STATegra), aiming it to be as generic as possible for multi-omics analysis, combining machine learning component analysis, non-parametric data combination and a multi-omics exploratory analysis in a step-wise manner. While in several studies we have previously combined those integrative tools, here we provide a systematic description of the STATegra framework and its validation using two TCGA case studies. For both, the Glioblastoma and the Skin Cutaneous Melanoma cases, we demonstrate an enhanced capacity to identify features in comparison to single-omics analysis. Such an integrative multi-omics analysis framework for the identification of features and components facilitates the discovery of new biology. Finally, we provide several options for applying the STATegra framework when parametric assumptions are fulfilled, and for the case when not all the samples are profiled for all omics. The STATegra framework is built using several tools, which are being integrated step-by-step as OpenSource in the STATegRa Bioconductor package https://bioconductor.org/packages/release/bioc/html/STATegra.html.
%0 Journal Article
%1 noauthororeditor
%A Phanell, Nuria
%A Lagani, Vincenzo
%A Sebastian-Leon, Patricia
%A Van der Kloet, Frans
%A Ewing, Ewoud
%A Karathanasis, Nestoras
%A Urdangarin, Arantxa
%A Arozarena, Imanol
%A Jagodic, Maja
%A Tsamardinos, Ioannis
%A Tarazona, Sonia
%A Conesa, Ana
%A Tegner, Jesper
%A Gomez-Cabrero, David
%D 2020
%J Frontiers in Genetics
%K data multi-omics
%R https://doi.org/10.1101/2020.11.20.391045
%T STATegra: Multi-omics data integration - A conceptual scheme and a bioinformatics pipeline
%U https://www.biorxiv.org/content/10.1101/2020.11.20.391045v1
%V to appear
%X Technologies for profiling samples using different omics platforms have been at the forefront since the human genome project. Large-scale multi-omics data hold the promise of deciphering different regulatory layers. Yet, while there is a myriad of bioinformatics tools, each multi-omics analysis appears to start from scratch with an arbitrary decision over which tools to use and how to combine them. It is therefore an unmet need to conceptualize how to integrate such data and to implement and validate pipelines in different cases. We have designed a conceptual framework (STATegra), aiming it to be as generic as possible for multi-omics analysis, combining machine learning component analysis, non-parametric data combination and a multi-omics exploratory analysis in a step-wise manner. While in several studies we have previously combined those integrative tools, here we provide a systematic description of the STATegra framework and its validation using two TCGA case studies. For both, the Glioblastoma and the Skin Cutaneous Melanoma cases, we demonstrate an enhanced capacity to identify features in comparison to single-omics analysis. Such an integrative multi-omics analysis framework for the identification of features and components facilitates the discovery of new biology. Finally, we provide several options for applying the STATegra framework when parametric assumptions are fulfilled, and for the case when not all the samples are profiled for all omics. The STATegra framework is built using several tools, which are being integrated step-by-step as OpenSource in the STATegRa Bioconductor package https://bioconductor.org/packages/release/bioc/html/STATegra.html.
@article{noauthororeditor,
abstract = {Technologies for profiling samples using different omics platforms have been at the forefront since the human genome project. Large-scale multi-omics data hold the promise of deciphering different regulatory layers. Yet, while there is a myriad of bioinformatics tools, each multi-omics analysis appears to start from scratch with an arbitrary decision over which tools to use and how to combine them. It is therefore an unmet need to conceptualize how to integrate such data and to implement and validate pipelines in different cases. We have designed a conceptual framework (STATegra), aiming it to be as generic as possible for multi-omics analysis, combining machine learning component analysis, non-parametric data combination and a multi-omics exploratory analysis in a step-wise manner. While in several studies we have previously combined those integrative tools, here we provide a systematic description of the STATegra framework and its validation using two TCGA case studies. For both, the Glioblastoma and the Skin Cutaneous Melanoma cases, we demonstrate an enhanced capacity to identify features in comparison to single-omics analysis. Such an integrative multi-omics analysis framework for the identification of features and components facilitates the discovery of new biology. Finally, we provide several options for applying the STATegra framework when parametric assumptions are fulfilled, and for the case when not all the samples are profiled for all omics. The STATegra framework is built using several tools, which are being integrated step-by-step as OpenSource in the STATegRa Bioconductor package https://bioconductor.org/packages/release/bioc/html/STATegra.html.},
added-at = {2021-01-25T08:02:51.000+0100},
author = {Phanell, Nuria and Lagani, Vincenzo and Sebastian-Leon, Patricia and Van der Kloet, Frans and Ewing, Ewoud and Karathanasis, Nestoras and Urdangarin, Arantxa and Arozarena, Imanol and Jagodic, Maja and Tsamardinos, Ioannis and Tarazona, Sonia and Conesa, Ana and Tegner, Jesper and Gomez-Cabrero, David},
biburl = {https://www.bibsonomy.org/bibtex/213d5658c490ee48b134629c33979e700/mensxmachina},
doi = {https://doi.org/10.1101/2020.11.20.391045},
interhash = {84dd53162ecf2659ffb75f1329f0aaad},
intrahash = {13d5658c490ee48b134629c33979e700},
journal = {Frontiers in Genetics },
keywords = {data multi-omics},
timestamp = {2021-01-25T08:02:51.000+0100},
title = {STATegra: Multi-omics data integration - A conceptual scheme and a bioinformatics pipeline},
url = {https://www.biorxiv.org/content/10.1101/2020.11.20.391045v1},
volume = {to appear },
year = 2020
}