We study two randomized algorithms for generalized linear bandits, GLM-TSL and GLM-FPL. GLM-TSL samples a generalized linear model (GLM) from the Laplace approximation to the posterior distribution. GLM-FPL fits a GLM to a randomly perturbed history of past rewards. We prove C d (n log K)^(1/2) bounds (up to log factors) on the n-round regret of GLM-TSL and GLM-FPL, where d is the number of features and K is the number of arms. The regret bound of GLM-TSL improves upon prior work and the regret bound of GLM-FPL is the first of its kind. We apply both GLM-TSL and GLM-FPL to logistic and neural network bandits, and show that they perform well empirically. In more complex models, GLM-FPL is significantly faster. Our results showcase the role of randomization, beyond sampling from the posterior, in exploration.
%0 Conference Paper
%1 KZSZLGB20
%A Kveton, B.
%A and Zaheer, M.
%A Szepesvári, Cs.
%A Li, L.
%A Ghavamzadeh, M.
%A Boutilier, C.
%B AISTATS
%D 2020
%K Thompson bandit, bandits, finite-armed follow-the-perturbed-leader, generalized linear randomization, sampling stochastic
%T Randomized Exploration in Generalized Linear Bandits
%U https://arxiv.org/abs/1906.08947
%X We study two randomized algorithms for generalized linear bandits, GLM-TSL and GLM-FPL. GLM-TSL samples a generalized linear model (GLM) from the Laplace approximation to the posterior distribution. GLM-FPL fits a GLM to a randomly perturbed history of past rewards. We prove C d (n log K)^(1/2) bounds (up to log factors) on the n-round regret of GLM-TSL and GLM-FPL, where d is the number of features and K is the number of arms. The regret bound of GLM-TSL improves upon prior work and the regret bound of GLM-FPL is the first of its kind. We apply both GLM-TSL and GLM-FPL to logistic and neural network bandits, and show that they perform well empirically. In more complex models, GLM-FPL is significantly faster. Our results showcase the role of randomization, beyond sampling from the posterior, in exploration.
@inproceedings{KZSZLGB20,
abstract = {We study two randomized algorithms for generalized linear bandits, GLM-TSL and GLM-FPL. GLM-TSL samples a generalized linear model (GLM) from the Laplace approximation to the posterior distribution. GLM-FPL fits a GLM to a randomly perturbed history of past rewards. We prove C d (n log K)^(1/2) bounds (up to log factors) on the n-round regret of GLM-TSL and GLM-FPL, where d is the number of features and K is the number of arms. The regret bound of GLM-TSL improves upon prior work and the regret bound of GLM-FPL is the first of its kind. We apply both GLM-TSL and GLM-FPL to logistic and neural network bandits, and show that they perform well empirically. In more complex models, GLM-FPL is significantly faster. Our results showcase the role of randomization, beyond sampling from the posterior, in exploration. },
added-at = {2020-03-17T03:03:01.000+0100},
author = {Kveton, B. and and Zaheer, M. and {Sz}epesv{\'a}ri, {Cs}. and Li, L. and Ghavamzadeh, M. and Boutilier, C.},
bdsk-url-1 = {http://proceedings.mlr.press/v54/hanawal17a.html},
biburl = {https://www.bibsonomy.org/bibtex/256a8d465038ac0b57911468a201ffdc4/csaba},
booktitle = {AISTATS},
date-added = {2020-03-07 14:31:44 -0700},
date-modified = {2020-03-07 14:56:07 -0700},
interhash = {7d26b80b4590b53a68d128f32462abf1},
intrahash = {56a8d465038ac0b57911468a201ffdc4},
keywords = {Thompson bandit, bandits, finite-armed follow-the-perturbed-leader, generalized linear randomization, sampling stochastic},
month = {March},
pdf = {papers/AISTATS2020-GLB.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Randomized Exploration in Generalized Linear Bandits},
url = {https://arxiv.org/abs/1906.08947},
year = 2020
}