The result cache is a vital component for e_ciency of largescale web search engines, and maintaining the freshness of cached query results is the current research challenge. As a remedy to this problem, our work proposes a new mechanism to identify queries whose cached results are stale. The basic idea behind our mechanism is to maintain and compare generation time of query results with update times of posting lists and documents to decide on staleness of query results. The proposed technique is evaluated using a Wikipedia document collection with real update information and a real-life query log. We show that our technique has good prediction accuracy, relative to a baseline based on the time-to-live mechanism. Moreover, it is easy to implement and incurs less processing overhead on the system relative to a recently proposed, more sophisticated invalidation mechanism.
%0 Conference Paper
%1 L3S_989248eba0e79742f317b409c23a93af567209d1
%A Alici, Sadiye
%A Altingövde, Ismail Sengor
%A Ozcan, Rifat
%A Cambazoglu, B. Barla
%A Ulusoy, Özgür
%B Proc. of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011, 973-982
%D 2011
%K Nejdl publication
%T Timestamp-based result cache invalidation for web search engines
%X The result cache is a vital component for e_ciency of largescale web search engines, and maintaining the freshness of cached query results is the current research challenge. As a remedy to this problem, our work proposes a new mechanism to identify queries whose cached results are stale. The basic idea behind our mechanism is to maintain and compare generation time of query results with update times of posting lists and documents to decide on staleness of query results. The proposed technique is evaluated using a Wikipedia document collection with real update information and a real-life query log. We show that our technique has good prediction accuracy, relative to a baseline based on the time-to-live mechanism. Moreover, it is easy to implement and incurs less processing overhead on the system relative to a recently proposed, more sophisticated invalidation mechanism.
@inproceedings{L3S_989248eba0e79742f317b409c23a93af567209d1,
abstract = {The result cache is a vital component for e_ciency of largescale web search engines, and maintaining the freshness of cached query results is the current research challenge. As a remedy to this problem, our work proposes a new mechanism to identify queries whose cached results are stale. The basic idea behind our mechanism is to maintain and compare generation time of query results with update times of posting lists and documents to decide on staleness of query results. The proposed technique is evaluated using a Wikipedia document collection with real update information and a real-life query log. We show that our technique has good prediction accuracy, relative to a baseline based on the time-to-live mechanism. Moreover, it is easy to implement and incurs less processing overhead on the system relative to a recently proposed, more sophisticated invalidation mechanism.},
added-at = {2012-06-15T13:14:42.000+0200},
author = {Alici, Sadiye and Altingövde, Ismail Sengor and Ozcan, Rifat and Cambazoglu, B. Barla and Ulusoy, Özgür},
biburl = {https://www.bibsonomy.org/bibtex/2732b81dd7ca0a5db1ec6ffae74611909/l3s},
booktitle = {Proc. of the 34th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2011, 973-982},
interhash = {335123e12b2fb59814624c55f14fbe2b},
intrahash = {732b81dd7ca0a5db1ec6ffae74611909},
keywords = {Nejdl publication},
timestamp = {2012-06-15T13:14:50.000+0200},
title = {Timestamp-based result cache invalidation for web search engines},
year = 2011
}