Online learning with delayed feedback has received increasing attention recently due to its several applications in distributed, web-based learning problems. In this paper we provide a systematic study of the topic, and analyze how the delay effects the regret of online learning algorithms. Somewhat surprisingly, it turns out that delay increases the regret in a multiplicative way in adversarial problems, and in an additive way in stochastic problems. We give meta-algorithms that transform, in a black-box fashion, algorithms developed for the non-delayed case into ones that can handle the presence of delays in the feedback loop. Modifications of the well-known UCB algorithm are also developed for the bandit problem with delayed feedback, with the advantage over the meta-algorithms that they can be implemented with much lower complexity.
%0 Conference Paper
%1 JoGySz15
%A Joulani, P.
%A György, A.
%A Szepesvári, Cs.
%B IJCAI
%D 2015
%K analysis, big bounds complexity computation, cross-validation, data, performance theory,
%P 3597--3604
%T Fast Cross-Validation for Incremental Learning
%X Online learning with delayed feedback has received increasing attention recently due to its several applications in distributed, web-based learning problems. In this paper we provide a systematic study of the topic, and analyze how the delay effects the regret of online learning algorithms. Somewhat surprisingly, it turns out that delay increases the regret in a multiplicative way in adversarial problems, and in an additive way in stochastic problems. We give meta-algorithms that transform, in a black-box fashion, algorithms developed for the non-delayed case into ones that can handle the presence of delays in the feedback loop. Modifications of the well-known UCB algorithm are also developed for the bandit problem with delayed feedback, with the advantage over the meta-algorithms that they can be implemented with much lower complexity.
@inproceedings{JoGySz15,
abstract = {Online learning with delayed feedback has received increasing attention recently due to its several applications in distributed, web-based learning problems. In this paper we provide a systematic study of the topic, and analyze how the delay effects the regret of online learning algorithms. Somewhat surprisingly, it turns out that delay increases the regret in a multiplicative way in adversarial problems, and in an additive way in stochastic problems. We give meta-algorithms that transform, in a black-box fashion, algorithms developed for the non-delayed case into ones that can handle the presence of delays in the feedback loop. Modifications of the well-known UCB algorithm are also developed for the bandit problem with delayed feedback, with the advantage over the meta-algorithms that they can be implemented with much lower complexity.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Joulani, P. and Gy{\"o}rgy, A. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/2a26eae6056f602ba7970c33436e358f3/csaba},
booktitle = {IJCAI},
date-added = {2015-04-17 16:16:35 +0000},
date-modified = {2015-08-02 00:45:23 +0000},
interhash = {64868dd5aee71a7156844f8cf712bf02},
intrahash = {a26eae6056f602ba7970c33436e358f3},
keywords = {analysis, big bounds complexity computation, cross-validation, data, performance theory,},
month = {April},
pages = {3597--3604},
pdf = {papers/tree-cv.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Fast Cross-Validation for Incremental Learning},
year = 2015
}