Are click-through data adequate for learning web search rankings?
Z. Dou, R. Song, X. Yuan, и J. Wen. CIKM '08: Proceeding of the 17th ACM conference on Information and knowledge management, стр. 73--82. New York, NY, USA, ACM, (2008)
DOI: 10.1145/1458082.1458095
Аннотация
Learning-to-rank algorithms, which can automatically adapt ranking functions in web search, require a large volume of training data. A traditional way of generating training examples is to employ human experts to judge the relevance of documents. Unfortunately, it is difficult, time-consuming and costly. In this paper, we study the problem of exploiting click-through data for learning web search rankings that can be collected at much lower cost. We extract pairwise relevance preferences from a large-scale aggregated click-through dataset, compare these preferences with explicit human judgments, and use them as training examples to learn ranking functions. We find click-through data are useful and effective in learning ranking functions. A straightforward use of aggregated click-through data can outperform human judgments. We demonstrate that the strategies are only slightly affected by fraudulent clicks. We also reveal that the pairs which are very reliable, e.g., the pairs consisting of documents with large click frequency differences, are not sufficient for learning.
%0 Conference Paper
%1 dou2008clickthrough
%A Dou, Zhicheng
%A Song, Ruihua
%A Yuan, Xiaojie
%A Wen, Ji-Rong
%B CIKM '08: Proceeding of the 17th ACM conference on Information and knowledge management
%C New York, NY, USA
%D 2008
%I ACM
%K click learning-to-rank ranking search web
%P 73--82
%R 10.1145/1458082.1458095
%T Are click-through data adequate for learning web search rankings?
%U http://portal.acm.org/citation.cfm?id=1458095
%X Learning-to-rank algorithms, which can automatically adapt ranking functions in web search, require a large volume of training data. A traditional way of generating training examples is to employ human experts to judge the relevance of documents. Unfortunately, it is difficult, time-consuming and costly. In this paper, we study the problem of exploiting click-through data for learning web search rankings that can be collected at much lower cost. We extract pairwise relevance preferences from a large-scale aggregated click-through dataset, compare these preferences with explicit human judgments, and use them as training examples to learn ranking functions. We find click-through data are useful and effective in learning ranking functions. A straightforward use of aggregated click-through data can outperform human judgments. We demonstrate that the strategies are only slightly affected by fraudulent clicks. We also reveal that the pairs which are very reliable, e.g., the pairs consisting of documents with large click frequency differences, are not sufficient for learning.
%@ 978-1-59593-991-3
@inproceedings{dou2008clickthrough,
abstract = {Learning-to-rank algorithms, which can automatically adapt ranking functions in web search, require a large volume of training data. A traditional way of generating training examples is to employ human experts to judge the relevance of documents. Unfortunately, it is difficult, time-consuming and costly. In this paper, we study the problem of exploiting click-through data for learning web search rankings that can be collected at much lower cost. We extract pairwise relevance preferences from a large-scale aggregated click-through dataset, compare these preferences with explicit human judgments, and use them as training examples to learn ranking functions. We find click-through data are useful and effective in learning ranking functions. A straightforward use of aggregated click-through data can outperform human judgments. We demonstrate that the strategies are only slightly affected by fraudulent clicks. We also reveal that the pairs which are very reliable, e.g., the pairs consisting of documents with large click frequency differences, are not sufficient for learning.},
added-at = {2010-08-05T09:22:53.000+0200},
address = {New York, NY, USA},
author = {Dou, Zhicheng and Song, Ruihua and Yuan, Xiaojie and Wen, Ji-Rong},
biburl = {https://www.bibsonomy.org/bibtex/25febcfb066de4ae1159bd8245a70fbc8/jaeschke},
booktitle = {CIKM '08: Proceeding of the 17th ACM conference on Information and knowledge management},
doi = {10.1145/1458082.1458095},
interhash = {1265bf68c0ea0ff5c2b4ee6c4a309378},
intrahash = {5febcfb066de4ae1159bd8245a70fbc8},
isbn = {978-1-59593-991-3},
keywords = {click learning-to-rank ranking search web},
location = {Napa Valley, California, USA},
pages = {73--82},
publisher = {ACM},
timestamp = {2014-07-28T15:57:31.000+0200},
title = {Are click-through data adequate for learning web search rankings?},
url = {http://portal.acm.org/citation.cfm?id=1458095},
year = 2008
}