We consider a sequential learning problem with Gaussian payoffs and side observations: after selecting an action i, the learner
receives information about the payoff of every action j in the form of Gaussian observations whose mean is the same as the mean payoff, but the variance depends on the pair (i,j) (and may be infinite). The setup allows a more refined information transfer from one action to another than previous partial monitoring setups, including the recently introduced graph-structured feedback case. For the first time in the literature, we provide non-asymptotic problem-dependent lower bounds on the regret of any algorithm, which recover existing asymptotic problem-dependent lower bounds and finite-time minimax lower bounds available in the literature. We also provide algorithms that achieve the problem-dependent lower bound (up to some universal constant factor) or the minimax lower bounds (up to logarithmic factors).
%0 Conference Paper
%1 WGySz:NIPS15
%A Wu, Y.
%A György, A.
%A Szepesvári, Cs.
%B NIPS
%D 2015
%K asymptotic bounds, finite-sample information, learning learning, minimax online optimality optimality, partial side-observations, with
%P 1360--1368
%T Online Learning with Gaussian Payoffs and Side Observations
%X We consider a sequential learning problem with Gaussian payoffs and side observations: after selecting an action i, the learner
receives information about the payoff of every action j in the form of Gaussian observations whose mean is the same as the mean payoff, but the variance depends on the pair (i,j) (and may be infinite). The setup allows a more refined information transfer from one action to another than previous partial monitoring setups, including the recently introduced graph-structured feedback case. For the first time in the literature, we provide non-asymptotic problem-dependent lower bounds on the regret of any algorithm, which recover existing asymptotic problem-dependent lower bounds and finite-time minimax lower bounds available in the literature. We also provide algorithms that achieve the problem-dependent lower bound (up to some universal constant factor) or the minimax lower bounds (up to logarithmic factors).
@inproceedings{WGySz:NIPS15,
abstract = {We consider a sequential learning problem with Gaussian payoffs and side observations: after selecting an action i, the learner
receives information about the payoff of every action j in the form of Gaussian observations whose mean is the same as the mean payoff, but the variance depends on the pair (i,j) (and may be infinite). The setup allows a more refined information transfer from one action to another than previous partial monitoring setups, including the recently introduced graph-structured feedback case. For the first time in the literature, we provide non-asymptotic problem-dependent lower bounds on the regret of any algorithm, which recover existing asymptotic problem-dependent lower bounds and finite-time minimax lower bounds available in the literature. We also provide algorithms that achieve the problem-dependent lower bound (up to some universal constant factor) or the minimax lower bounds (up to logarithmic factors). },
added-at = {2020-03-17T03:03:01.000+0100},
author = {Wu, Y. and Gy{\"o}rgy, A. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/2203cc46f142bb7b2e82fa5271a98d034/csaba},
booktitle = {NIPS},
date-added = {2015-12-02 01:36:54 +0000},
date-modified = {2016-08-16 23:22:40 +0000},
interhash = {7f7aba02c74ef96c970b487e523d6bbe},
intrahash = {203cc46f142bb7b2e82fa5271a98d034},
keywords = {asymptotic bounds, finite-sample information, learning learning, minimax online optimality optimality, partial side-observations, with},
month = {September},
pages = {1360--1368},
pdf = {papers/NIPS15-SideObs.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Online Learning with Gaussian Payoffs and Side Observations},
year = 2015
}