We present an algorithm, witch, that learns to detect spam hosts or pages on the Web. Unlike most other approaches, it simultaneously exploits the structure of the Web graph as well as page contents and features. The method is efficient, scalable, and provides state-of-the-art accuracy on a standard Web spam benchmark.
Description
Web spam identification through content and hyperlinks
%0 Conference Paper
%1 abernethy2008identification
%A Abernethy, Jacob
%A Chapelle, Olivier
%A Castillo, Carlos
%B Proceedings of the 4th International Workshop on Adversarial Information Retrieval on the Web
%C New York, NY, USA
%D 2008
%I ACM
%K content hyperlinks identification spam web
%P 41--44
%R 10.1145/1451983.1451994
%T Web Spam Identification Through Content and Hyperlinks
%U http://doi.acm.org/10.1145/1451983.1451994
%X We present an algorithm, witch, that learns to detect spam hosts or pages on the Web. Unlike most other approaches, it simultaneously exploits the structure of the Web graph as well as page contents and features. The method is efficient, scalable, and provides state-of-the-art accuracy on a standard Web spam benchmark.
%@ 978-1-60558-159-0
@inproceedings{abernethy2008identification,
abstract = {We present an algorithm, witch, that learns to detect spam hosts or pages on the Web. Unlike most other approaches, it simultaneously exploits the structure of the Web graph as well as page contents and features. The method is efficient, scalable, and provides state-of-the-art accuracy on a standard Web spam benchmark.},
acmid = {1451994},
added-at = {2018-04-11T19:24:14.000+0200},
address = {New York, NY, USA},
author = {Abernethy, Jacob and Chapelle, Olivier and Castillo, Carlos},
biburl = {https://www.bibsonomy.org/bibtex/23fd1fd56f52affb95b132390a7b37fc4/nosebrain},
booktitle = {Proceedings of the 4th International Workshop on Adversarial Information Retrieval on the Web},
description = {Web spam identification through content and hyperlinks},
doi = {10.1145/1451983.1451994},
interhash = {943c48889e8fbf17eaac7b91cba28c3f},
intrahash = {3fd1fd56f52affb95b132390a7b37fc4},
isbn = {978-1-60558-159-0},
keywords = {content hyperlinks identification spam web},
location = {Beijing, China},
numpages = {4},
pages = {41--44},
publisher = {ACM},
series = {AIRWeb '08},
timestamp = {2018-04-18T14:40:39.000+0200},
title = {Web Spam Identification Through Content and Hyperlinks},
url = {http://doi.acm.org/10.1145/1451983.1451994},
year = 2008
}