Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.
%0 Journal Article
%1 citeulike:10560379
%A Liu, Yiqun
%A Chen, Fei
%A Kong, Weize
%A Yu, Huijia
%A Zhang, Min
%A Ma, Shaoping
%A Ru, Liyun
%C New York, NY, USA
%D 2012
%I ACM
%J ACM Trans. Web
%K crowdsourcing social-web spam
%N 1
%R 10.1145/2109205.2109207
%T Identifying Web Spam with the Wisdom of the Crowds
%U http://dx.doi.org/10.1145/2109205.2109207
%V 6
%X Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.
@article{citeulike:10560379,
abstract = {{Combating Web spam has become one of the top challenges for Web search engines. State-of-the-art spam-detection techniques are usually designed for specific, known types of Web spam and are incapable of dealing with newly appearing spam types efficiently. With user-behavior analyses from Web access logs, a spam page-detection algorithm is proposed based on a learning scheme. The main contributions are the following. (1) User-visiting patterns of spam pages are studied, and a number of user-behavior features are proposed for separating Web spam pages from ordinary pages. (2) A novel spam-detection framework is proposed that can detect various kinds of Web spam, including newly appearing ones, with the help of the user-behavior analysis. Experiments on large-scale practical Web access log data show the effectiveness of the proposed features and the detection framework.}},
added-at = {2017-11-15T17:02:25.000+0100},
address = {New York, NY, USA},
author = {Liu, Yiqun and Chen, Fei and Kong, Weize and Yu, Huijia and Zhang, Min and Ma, Shaoping and Ru, Liyun},
biburl = {https://www.bibsonomy.org/bibtex/2cb1e6be0b8ee5bb264819e0194fc795d/brusilovsky},
citeulike-article-id = {10560379},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=2109207},
citeulike-linkout-1 = {http://dx.doi.org/10.1145/2109205.2109207},
doi = {10.1145/2109205.2109207},
interhash = {2267cef4bce5693b9452fffa629b871a},
intrahash = {cb1e6be0b8ee5bb264819e0194fc795d},
issn = {1559-1131},
journal = {ACM Trans. Web},
keywords = {crowdsourcing social-web spam},
month = mar,
number = 1,
posted-at = {2012-04-12 19:07:53},
priority = {2},
publisher = {ACM},
timestamp = {2020-06-30T16:15:02.000+0200},
title = {{Identifying Web Spam with the Wisdom of the Crowds}},
url = {http://dx.doi.org/10.1145/2109205.2109207},
volume = 6,
year = 2012
}