Syntactic clustering of the Web.
Approach for finding very similar docs on the web.
* Shingling approach
* Computation of digest basing on shingles.
* Computation of super shingles.
* Filtering.
* Clustering in part (division into tiles and merging)
About 30.000.000 docs analyzed.
This approach is used in the paper of Vedran
%0 Journal Article
%1 283370
%A Broder, Andrei Z.
%A Glassman, Steven C.
%A Manasse, Mark S.
%A Zweig, Geoffrey
%C Amsterdam, The Netherlands, The Netherlands
%D 1997
%I Elsevier Science Publishers B. V.
%J Comput. Netw. ISDN Syst.
%K clustering idiom similarity
%N 8-13
%P 1157--1166
%R http://dx.doi.org/10.1016/S0169-7552(97)00031-7
%T Syntactic clustering of the Web
%U http://portal.acm.org/citation.cfm?id=283370
%V 29
@article{283370,
added-at = {2008-05-21T18:32:56.000+0200},
address = {Amsterdam, The Netherlands, The Netherlands},
author = {Broder, Andrei Z. and Glassman, Steven C. and Manasse, Mark S. and Zweig, Geoffrey},
biburl = {https://www.bibsonomy.org/bibtex/25fc2af573c43a5c37d31437ced7b5272/dzibold},
description = {Syntactic clustering of the Web.
Approach for finding very similar docs on the web.
* Shingling approach
* Computation of digest basing on shingles.
* Computation of super shingles.
* Filtering.
* Clustering in part (division into tiles and merging)
About 30.000.000 docs analyzed.
This approach is used in the paper of Vedran},
doi = {http://dx.doi.org/10.1016/S0169-7552(97)00031-7},
interhash = {424cdc36335873e4d8c0bed6e07e872e},
intrahash = {5fc2af573c43a5c37d31437ced7b5272},
issn = {0169-7552},
journal = {Comput. Netw. ISDN Syst.},
keywords = {clustering idiom similarity},
number = {8-13},
pages = {1157--1166},
publisher = {Elsevier Science Publishers B. V.},
timestamp = {2008-05-28T00:55:16.000+0200},
title = {Syntactic clustering of the Web},
url = {http://portal.acm.org/citation.cfm?id=283370},
volume = 29,
year = 1997
}