We have developed an efficient way to determine the syntactic similarity of files and have applied it to every document on the World Wide Web. Using this mechanism, we built a clustering of all the documents that are syntactically similar. Possible applications include a "Lost and Found" service, filtering the results of Web searches, updating widely distributed web-pages, and identifying violations of intellectual property rights.
Description
ScienceDirect - Computer Networks and ISDN Systems : Syntactic clustering of the Web
%0 Journal Article
%1 keyhere
%A Broder, Andrei Z.
%A Glassman, Steven C.
%A Manasse, Mark S.
%A Zweig, Geoffrey
%B Papers from the Sixth International World Wide Web Conference
%D 1997
%J Computer Networks and ISDN Systems
%K clustering syntactic web
%N 8-13
%P 1157--1166
%T Syntactic clustering of the Web
%U http://www.sciencedirect.com/science/article/B6TYT-3SP60S4-11/2/38f44c816ec8d69b406317de1629e56d
%V 29
%X We have developed an efficient way to determine the syntactic similarity of files and have applied it to every document on the World Wide Web. Using this mechanism, we built a clustering of all the documents that are syntactically similar. Possible applications include a "Lost and Found" service, filtering the results of Web searches, updating widely distributed web-pages, and identifying violations of intellectual property rights.
@article{keyhere,
abstract = {We have developed an efficient way to determine the syntactic similarity of files and have applied it to every document on the World Wide Web. Using this mechanism, we built a clustering of all the documents that are syntactically similar. Possible applications include a "Lost and Found" service, filtering the results of Web searches, updating widely distributed web-pages, and identifying violations of intellectual property rights.},
added-at = {2007-11-30T20:35:01.000+0100},
author = {Broder, Andrei Z. and Glassman, Steven C. and Manasse, Mark S. and Zweig, Geoffrey},
biburl = {https://www.bibsonomy.org/bibtex/293a3440b81c13ec81c17481a97719c71/wnpxrz},
booktitle = {Papers from the Sixth International World Wide Web Conference},
description = {ScienceDirect - Computer Networks and ISDN Systems : Syntactic clustering of the Web},
interhash = {424cdc36335873e4d8c0bed6e07e872e},
intrahash = {93a3440b81c13ec81c17481a97719c71},
journal = {Computer Networks and ISDN Systems},
keywords = {clustering syntactic web},
month = {#sep#},
number = {8-13},
pages = {1157--1166},
timestamp = {2007-11-30T20:35:02.000+0100},
title = {Syntactic clustering of the Web},
url = {http://www.sciencedirect.com/science/article/B6TYT-3SP60S4-11/2/38f44c816ec8d69b406317de1629e56d},
volume = 29,
year = 1997
}