High-throughput DNA microarray provides an effective approach to the monitoring of expression levels of thousands of genes in a sample simultaneously. One promising application of this technology is the molecular diagnostics of cancer, e.g. to distinguish normal tissue from tumor or to classify tumors into different types or subtypes. One problem arising from the use of microarray data is how to analyze the high-dimensional gene expression data, typically with thousands of variables (genes) and much fewer observations (samples). There is a need to develop reliable classification methods to make full use of microarray data and to evaluate accurately the predictive ability and reliability of such derived models. In this paper, discriminant partial least squares was used to classify the different types of human tumors using four microarray datasets and showed good prediction performance. Four different cross-validation procedures (leave-one-out versus leave-half-out; incomplete versus full) were used to evaluate the classification model. Our results indicate that discriminant partial least squares using leave-half-out cross-validation provides a more realistic estimate of the predictive ability of a classification model, which may be overestimated by some of the cross-validation procedures, and the information obtained from different cross-validation procedures can be used to evaluate the reliability of the classification model.
Description
ScienceDirect - Computational Biology and Chemistry : Multi-class tumor classification by discriminant partial least squares using microarray gene expression data and assessment of classification models
%0 Journal Article
%1 Tan:2004
%A Tan, Yongxi
%A Shi, Leming
%A Tong, Weida
%A Hwang, G.T. Gene
%A Wang, Charles
%D 2004
%J Computational Biology and Chemistry
%K dimension-reduction discriminant discriminant-analysis high-dimensional high-dimensional-data pls statistics
%N 3
%P 235 - 243
%R 10.1016/j.compbiolchem.2004.05.002
%T Multi-class tumor classification by discriminant partial least squares using microarray gene expression data and assessment of classification models
%U http://www.sciencedirect.com/science/article/pii/S147692710400043X
%V 28
%X High-throughput DNA microarray provides an effective approach to the monitoring of expression levels of thousands of genes in a sample simultaneously. One promising application of this technology is the molecular diagnostics of cancer, e.g. to distinguish normal tissue from tumor or to classify tumors into different types or subtypes. One problem arising from the use of microarray data is how to analyze the high-dimensional gene expression data, typically with thousands of variables (genes) and much fewer observations (samples). There is a need to develop reliable classification methods to make full use of microarray data and to evaluate accurately the predictive ability and reliability of such derived models. In this paper, discriminant partial least squares was used to classify the different types of human tumors using four microarray datasets and showed good prediction performance. Four different cross-validation procedures (leave-one-out versus leave-half-out; incomplete versus full) were used to evaluate the classification model. Our results indicate that discriminant partial least squares using leave-half-out cross-validation provides a more realistic estimate of the predictive ability of a classification model, which may be overestimated by some of the cross-validation procedures, and the information obtained from different cross-validation procedures can be used to evaluate the reliability of the classification model.
@article{Tan:2004,
abstract = {High-throughput DNA microarray provides an effective approach to the monitoring of expression levels of thousands of genes in a sample simultaneously. One promising application of this technology is the molecular diagnostics of cancer, e.g. to distinguish normal tissue from tumor or to classify tumors into different types or subtypes. One problem arising from the use of microarray data is how to analyze the high-dimensional gene expression data, typically with thousands of variables (genes) and much fewer observations (samples). There is a need to develop reliable classification methods to make full use of microarray data and to evaluate accurately the predictive ability and reliability of such derived models. In this paper, discriminant partial least squares was used to classify the different types of human tumors using four microarray datasets and showed good prediction performance. Four different cross-validation procedures (leave-one-out versus leave-half-out; incomplete versus full) were used to evaluate the classification model. Our results indicate that discriminant partial least squares using leave-half-out cross-validation provides a more realistic estimate of the predictive ability of a classification model, which may be overestimated by some of the cross-validation procedures, and the information obtained from different cross-validation procedures can be used to evaluate the reliability of the classification model.},
added-at = {2011-10-20T01:04:44.000+0200},
author = {Tan, Yongxi and Shi, Leming and Tong, Weida and Hwang, G.T. Gene and Wang, Charles},
biburl = {https://www.bibsonomy.org/bibtex/29865189d356da472804fb79797b17cfd/vivion},
description = {ScienceDirect - Computational Biology and Chemistry : Multi-class tumor classification by discriminant partial least squares using microarray gene expression data and assessment of classification models},
doi = {10.1016/j.compbiolchem.2004.05.002},
interhash = {87d48e0ee1750755d20ded2d11fdd45b},
intrahash = {9865189d356da472804fb79797b17cfd},
issn = {1476-9271},
journal = {Computational Biology and Chemistry},
keywords = {dimension-reduction discriminant discriminant-analysis high-dimensional high-dimensional-data pls statistics},
number = 3,
pages = {235 - 243},
timestamp = {2011-10-20T01:04:44.000+0200},
title = {Multi-class tumor classification by discriminant partial least squares using microarray gene expression data and assessment of classification models},
url = {http://www.sciencedirect.com/science/article/pii/S147692710400043X},
volume = 28,
year = 2004
}