Linear mixed models are a powerful statistical tool for identifying genetic associations and avoiding confounding. However, existing methods are computationally intractable in large cohorts and may not optimize power. All existing methods require time cost O(MN(2)) (where N is the number of samples and M is the number of SNPs) and implicitly assume an infinitesimal genetic architecture in which effect sizes are normally distributed, which can limit power. Here we present a far more efficient mixed-model association method, BOLT-LMM, which requires only a small number of O(MN) time iterations and increases power by modeling more realistic, non-infinitesimal genetic architectures via a Bayesian mixture prior on marker effect sizes. We applied BOLT-LMM to 9 quantitative traits in 23,294 samples from the Women's Genome Health Study (WGHS) and observed significant increases in power, consistent with simulations. Theory and simulations show that the boost in power increases with cohort size, making BOLT-LMM appealing for genome-wide association studies in large cohorts.
%0 Journal Article
%1 loh2015efficient
%A Loh, P R
%A Tucker, G
%A Bulik-Sullivan, B K
%A Vilhjálmsson, B J
%A Finucane, H K
%A Salem, R M
%A Chasman, D I
%A Ridker, P M
%A Neale, B M
%A Berger, B
%A Patterson, N
%A Price, A L
%D 2015
%J Nat Genet
%K GWAS LMM methods mixed_models
%N 3
%P 284-290
%R 10.1038/ng.3190
%T Efficient Bayesian mixed-model analysis increases association power in large cohorts
%U https://www.ncbi.nlm.nih.gov/pubmed/25642633
%V 47
%X Linear mixed models are a powerful statistical tool for identifying genetic associations and avoiding confounding. However, existing methods are computationally intractable in large cohorts and may not optimize power. All existing methods require time cost O(MN(2)) (where N is the number of samples and M is the number of SNPs) and implicitly assume an infinitesimal genetic architecture in which effect sizes are normally distributed, which can limit power. Here we present a far more efficient mixed-model association method, BOLT-LMM, which requires only a small number of O(MN) time iterations and increases power by modeling more realistic, non-infinitesimal genetic architectures via a Bayesian mixture prior on marker effect sizes. We applied BOLT-LMM to 9 quantitative traits in 23,294 samples from the Women's Genome Health Study (WGHS) and observed significant increases in power, consistent with simulations. Theory and simulations show that the boost in power increases with cohort size, making BOLT-LMM appealing for genome-wide association studies in large cohorts.
@article{loh2015efficient,
abstract = {Linear mixed models are a powerful statistical tool for identifying genetic associations and avoiding confounding. However, existing methods are computationally intractable in large cohorts and may not optimize power. All existing methods require time cost O(MN(2)) (where N is the number of samples and M is the number of SNPs) and implicitly assume an infinitesimal genetic architecture in which effect sizes are normally distributed, which can limit power. Here we present a far more efficient mixed-model association method, BOLT-LMM, which requires only a small number of O(MN) time iterations and increases power by modeling more realistic, non-infinitesimal genetic architectures via a Bayesian mixture prior on marker effect sizes. We applied BOLT-LMM to 9 quantitative traits in 23,294 samples from the Women's Genome Health Study (WGHS) and observed significant increases in power, consistent with simulations. Theory and simulations show that the boost in power increases with cohort size, making BOLT-LMM appealing for genome-wide association studies in large cohorts. },
added-at = {2018-08-22T23:39:54.000+0200},
author = {Loh, P R and Tucker, G and Bulik-Sullivan, B K and Vilhj{\'a}lmsson, B J and Finucane, H K and Salem, R M and Chasman, D I and Ridker, P M and Neale, B M and Berger, B and Patterson, N and Price, A L},
biburl = {https://www.bibsonomy.org/bibtex/25d00d84813060199c868dc0ba8179502/peter.ralph},
doi = {10.1038/ng.3190},
interhash = {df234a1489b41c52816d9f05b3bc9cc9},
intrahash = {5d00d84813060199c868dc0ba8179502},
journal = {Nat Genet},
keywords = {GWAS LMM methods mixed_models},
month = mar,
number = 3,
pages = {284-290},
pmid = {25642633},
timestamp = {2018-08-22T23:39:54.000+0200},
title = {Efficient {Bayesian} mixed-model analysis increases association power in large cohorts},
url = {https://www.ncbi.nlm.nih.gov/pubmed/25642633},
volume = 47,
year = 2015
}