The goal of the work presented in this paper is to obtain large amounts of semistructured data from the web. Harvesting semistructured
data is a prerequisite to enabling large-scale query answering over web sources. We contrast our approach to conventionalweb crawlers, and describe and evaluate a five-step pipelined architecture to crawl and index data from both the traditionaland the Semantic Web.
%0 Journal Article
%1 keyhere
%A Harth, Andreas
%A Umbrich, Jürgen
%A Decker, Stefan
%D 2006
%J The Semantic Web - ISWC 2006
%K browser diplomarbeit semanticweb
%P 258--271
%T MultiCrawler: A Pipelined Architecture for Crawling and Indexing Semantic Web Data
%U http://dx.doi.org/10.1007/11926078_19
%X The goal of the work presented in this paper is to obtain large amounts of semistructured data from the web. Harvesting semistructured
data is a prerequisite to enabling large-scale query answering over web sources. We contrast our approach to conventionalweb crawlers, and describe and evaluate a five-step pipelined architecture to crawl and index data from both the traditionaland the Semantic Web.
@article{keyhere,
abstract = {The goal of the work presented in this paper is to obtain large amounts of semistructured data from the web. Harvesting semistructured
data is a prerequisite to enabling large-scale query answering over web sources. We contrast our approach to conventionalweb crawlers, and describe and evaluate a five-step pipelined architecture to crawl and index data from both the traditionaland the Semantic Web.},
added-at = {2008-12-04T15:43:03.000+0100},
author = {Harth, Andreas and Umbrich, Jürgen and Decker, Stefan},
biburl = {https://www.bibsonomy.org/bibtex/21b864c53dd91dbaadba110f668948cdb/dominikb1888},
description = {SpringerLink - Book Chapter},
interhash = {1514f2d043db58018af8e9858ba08f50},
intrahash = {1b864c53dd91dbaadba110f668948cdb},
journal = {The Semantic Web - ISWC 2006},
keywords = {browser diplomarbeit semanticweb},
pages = {258--271},
timestamp = {2010-12-09T12:52:02.000+0100},
title = {MultiCrawler: A Pipelined Architecture for Crawling and Indexing Semantic Web Data},
url = {http://dx.doi.org/10.1007/11926078_19},
year = 2006
}