While Web archive quality is endangered by Web spam, a side effect
of the high commercial value of top-ranked search-engine results,
so farWeb spam filtering technologies are rarely used byWeb
archivists. In this paper we make the first attempt to disseminate
existing methodology and envision a solution for Web archives to
share knowledge and unite efforts in Web spam hunting. We survey
the state of the art inWeb spam filtering illustrated by the recent
Web spam challenge data sets and techniques and describe the filtering
solution for archives envisioned in the LiWA—Living Web
Archives project.
%0 Conference Paper
%1 benczur2008survey
%A Benczúr, András A.
%A Siklósi, Dávid
%A Szabó, Jácint
%A Bíró, István
%A Fekete, Zsolt
%A and Miklós Kurucz,
%A Pereszlényi, Attila
%A Rácz, Simon
%A Szabó, Adrienn
%B Proceedings of the 8th International Web Archiving Workshop IWAW'08
%C Aaarhus, Denmark
%D 2008
%K archive spam survey web alexandria
%T Web Spam: a Survey with Vision for the Archivist
%U http://iwaw.europarchive.org/08/IWAW2008-Benczur.pdf
%X While Web archive quality is endangered by Web spam, a side effect
of the high commercial value of top-ranked search-engine results,
so farWeb spam filtering technologies are rarely used byWeb
archivists. In this paper we make the first attempt to disseminate
existing methodology and envision a solution for Web archives to
share knowledge and unite efforts in Web spam hunting. We survey
the state of the art inWeb spam filtering illustrated by the recent
Web spam challenge data sets and techniques and describe the filtering
solution for archives envisioned in the LiWA—Living Web
Archives project.
@inproceedings{benczur2008survey,
abstract = {While Web archive quality is endangered by Web spam, a side effect
of the high commercial value of top-ranked search-engine results,
so farWeb spam filtering technologies are rarely used byWeb
archivists. In this paper we make the first attempt to disseminate
existing methodology and envision a solution for Web archives to
share knowledge and unite efforts in Web spam hunting. We survey
the state of the art inWeb spam filtering illustrated by the recent
Web spam challenge data sets and techniques and describe the filtering
solution for archives envisioned in the LiWA—Living Web
Archives project.},
added-at = {2012-09-06T14:29:19.000+0200},
address = {Aaarhus, Denmark},
author = {Benczúr, András A. and Siklósi, Dávid and Szabó, Jácint and Bíró, István and Fekete, Zsolt and and Miklós Kurucz and Pereszlényi, Attila and Rácz, Simon and Szabó, Adrienn},
biburl = {https://www.bibsonomy.org/bibtex/2911a912a75e50451923522223f7717e8/jaeschke},
booktitle = {Proceedings of the 8th International Web Archiving Workshop IWAW'08},
interhash = {b09d09a4d29ba2a80a5a29b9a76ed5f0},
intrahash = {911a912a75e50451923522223f7717e8},
keywords = {archive spam survey web alexandria},
month = sep,
timestamp = {2014-07-28T15:57:31.000+0200},
title = {Web Spam: a Survey with Vision for the Archivist},
url = {http://iwaw.europarchive.org/08/IWAW2008-Benczur.pdf},
year = 2008
}