The paper presents effective and mathematically exact procedures for selection of variables which are applicable in cases with a very high dimension as, for example, in gene expression analysis. Choosing sets of variables is an important method to increase the power of the statistical conclusions and to facilitate the biological interpretation. For the construction of sets, each single variable is considered as the centre of potential sets of variables. Testing for significance is carried out by means of the Westfall-Young principle based on resampling or by the parametric method of spherical tests. The particular requirements for statistical stability are taken into account; each kind of overfitting is avoided. Thus, high power is attained and the familywise type I error can be kept in spite of the large dimension. To obtain graphical representations by heat maps and curves, a specific data compression technique is applied. Gene expression data from B-cell lymphoma patients serve for the demonstration of the procedures.
%0 Journal Article
%1 Lauter.2009
%A Läuter, Jürgen
%A Horn, Friedemann
%A Rosołowski, Maciej
%A Glimm, Ekkehard
%D 2009
%J Biometrical journal. Biometrische Zeitschrift
%K Algorithms Gene_Expression_Profiling/methods Humans Lymphoma,_B-Cell/metabolism Models,_Statistical Multivariate_Analysis Oligonucleotide_Array_Sequence_Analysis/methods Software
%N 2
%P 235–251
%T High-dimensional data analysis: selection of variables, data compression and graphics--application to gene expression
%V 51
%X The paper presents effective and mathematically exact procedures for selection of variables which are applicable in cases with a very high dimension as, for example, in gene expression analysis. Choosing sets of variables is an important method to increase the power of the statistical conclusions and to facilitate the biological interpretation. For the construction of sets, each single variable is considered as the centre of potential sets of variables. Testing for significance is carried out by means of the Westfall-Young principle based on resampling or by the parametric method of spherical tests. The particular requirements for statistical stability are taken into account; each kind of overfitting is avoided. Thus, high power is attained and the familywise type I error can be kept in spite of the large dimension. To obtain graphical representations by heat maps and curves, a specific data compression technique is applied. Gene expression data from B-cell lymphoma patients serve for the demonstration of the procedures.
@article{Lauter.2009,
abstract = {The paper presents effective and mathematically exact procedures for selection of variables which are applicable in cases with a very high dimension as, for example, in gene expression analysis. Choosing sets of variables is an important method to increase the power of the statistical conclusions and to facilitate the biological interpretation. For the construction of sets, each single variable is considered as the centre of potential sets of variables. Testing for significance is carried out by means of the Westfall-Young principle based on resampling or by the parametric method of spherical tests. The particular requirements for statistical stability are taken into account; each kind of overfitting is avoided. Thus, high power is attained and the familywise type I error can be kept in spite of the large dimension. To obtain graphical representations by heat maps and curves, a specific data compression technique is applied. Gene expression data from B-cell lymphoma patients serve for the demonstration of the procedures.},
added-at = {2014-10-15T15:04:09.000+0200},
author = {Läuter, Jürgen and Horn, Friedemann and Rosołowski, Maciej and Glimm, Ekkehard},
biburl = {https://www.bibsonomy.org/bibtex/2baa2c7a4832ffa02df4f67f5776bf338/drtester},
interhash = {1a58471f721bf3f54bac4cca3a58081e},
intrahash = {baa2c7a4832ffa02df4f67f5776bf338},
journal = {Biometrical journal. Biometrische Zeitschrift},
keywords = {Algorithms Gene_Expression_Profiling/methods Humans Lymphoma,_B-Cell/metabolism Models,_Statistical Multivariate_Analysis Oligonucleotide_Array_Sequence_Analysis/methods Software},
number = 2,
pages = {235–251},
timestamp = {2014-10-15T15:04:09.000+0200},
title = {High-dimensional data analysis: selection of variables, data compression and graphics--application to gene expression},
volume = 51,
year = 2009
}