A key issue in designing a focused Web crawler is how to determine whether an unvisited URL is relevant to the search topic. Effective relevance prediction can help avoid downloading and visiting many irrelevant pages. In this paper, we propose a new learning-based approach to improve relevance prediction in focused Web crawlers. For this study, we chose Naïve Bayesian as the base prediction model, which however can be easily switched to a different prediction model. Experimental result shows that our approach is valid and more efficient than related approaches.
Beschreibung
Improving Relevance Prediction for Focused Web Crawlers | IEEE Conference Publication | IEEE Xplore
%0 Conference Paper
%1 6211091
%A Safran, Mejdl S.
%A Althagafi, Abdullah
%A Che, Dunren
%B 2012 IEEE/ACIS 11th International Conference on Computer and Information Science
%D 2012
%K crawler relevance unkowndata web
%P 161-166
%R 10.1109/ICIS.2012.61
%T Improving Relevance Prediction for Focused Web Crawlers
%U https://ieeexplore.ieee.org/abstract/document/6211091
%X A key issue in designing a focused Web crawler is how to determine whether an unvisited URL is relevant to the search topic. Effective relevance prediction can help avoid downloading and visiting many irrelevant pages. In this paper, we propose a new learning-based approach to improve relevance prediction in focused Web crawlers. For this study, we chose Naïve Bayesian as the base prediction model, which however can be easily switched to a different prediction model. Experimental result shows that our approach is valid and more efficient than related approaches.
@inproceedings{6211091,
abstract = {A key issue in designing a focused Web crawler is how to determine whether an unvisited URL is relevant to the search topic. Effective relevance prediction can help avoid downloading and visiting many irrelevant pages. In this paper, we propose a new learning-based approach to improve relevance prediction in focused Web crawlers. For this study, we chose Naïve Bayesian as the base prediction model, which however can be easily switched to a different prediction model. Experimental result shows that our approach is valid and more efficient than related approaches.},
added-at = {2021-09-12T12:33:40.000+0200},
author = {Safran, Mejdl S. and Althagafi, Abdullah and Che, Dunren},
biburl = {https://www.bibsonomy.org/bibtex/225c59be247efc9c84ec6e1e0eb2f4e3c/parismic},
booktitle = {2012 IEEE/ACIS 11th International Conference on Computer and Information Science},
description = {Improving Relevance Prediction for Focused Web Crawlers | IEEE Conference Publication | IEEE Xplore},
doi = {10.1109/ICIS.2012.61},
interhash = {16887d9175a49259b4a8a848bfaa106d},
intrahash = {25c59be247efc9c84ec6e1e0eb2f4e3c},
keywords = {crawler relevance unkowndata web},
month = may,
pages = {161-166},
timestamp = {2021-09-12T12:33:40.000+0200},
title = {Improving Relevance Prediction for Focused Web Crawlers},
url = {https://ieeexplore.ieee.org/abstract/document/6211091},
year = 2012
}