"Deep Web" refers to the rich information and data hidden in backend databases, etc., that search engines or Web crawlers cannot access. It is mostly accessible through manual query interfaces. This paper introduces the Semantic Deep Web, utilizing an ontology to determine relevance of query interface attributes to access the Deep Web. In addition, we present a novel approach to automatically extracting attributes from query interfaces in order to address the current limitations in accessing Deep Web data sources. Our Automatic Attribute Extraction method (1) identifies attributes that are used by query Web page designers, called Programmer Viewpoint Attributes, and (2) attributes that are presented as labels to users, called User Viewpoint Attributes. An ontology enriches the candidate query attributes by providing synonyms and by supporting the attributes used by designers and users. Our experimental results in several e-commerce domains show that the attributes obtained by our algorithm compare favorably with manually determined attributes to be used for Deep Web queries.
%0 Conference Paper
%1 An_2007
%A An, Yoo Jung
%A Geller, James
%A Wu, Yi-Ta
%A Chun, Soon Ae
%B SAC '07: Proceedings of the 2007 ACM symposium on Applied computing
%C New York, NY
%D 2007
%I ACM
%K ontologies queries search semantic_deep_web web2.0
%P 1667--1672
%R http://doi.acm.org/10.1145/1244002.1244355
%T Semantic deep web: automatic attribute extraction from the deep web data sources
%U http://portal.acm.org/citation.cfm?doid=1244002.1244355#
%X "Deep Web" refers to the rich information and data hidden in backend databases, etc., that search engines or Web crawlers cannot access. It is mostly accessible through manual query interfaces. This paper introduces the Semantic Deep Web, utilizing an ontology to determine relevance of query interface attributes to access the Deep Web. In addition, we present a novel approach to automatically extracting attributes from query interfaces in order to address the current limitations in accessing Deep Web data sources. Our Automatic Attribute Extraction method (1) identifies attributes that are used by query Web page designers, called Programmer Viewpoint Attributes, and (2) attributes that are presented as labels to users, called User Viewpoint Attributes. An ontology enriches the candidate query attributes by providing synonyms and by supporting the attributes used by designers and users. Our experimental results in several e-commerce domains show that the attributes obtained by our algorithm compare favorably with manually determined attributes to be used for Deep Web queries.
%@ 1-59593-480-4
@inproceedings{An_2007,
abstract = {"Deep Web" refers to the rich information and data hidden in backend databases, etc., that search engines or Web crawlers cannot access. It is mostly accessible through manual query interfaces. This paper introduces the Semantic Deep Web, utilizing an ontology to determine relevance of query interface attributes to access the Deep Web. In addition, we present a novel approach to automatically extracting attributes from query interfaces in order to address the current limitations in accessing Deep Web data sources. Our Automatic Attribute Extraction method (1) identifies attributes that are used by query Web page designers, called Programmer Viewpoint Attributes, and (2) attributes that are presented as labels to users, called User Viewpoint Attributes. An ontology enriches the candidate query attributes by providing synonyms and by supporting the attributes used by designers and users. Our experimental results in several e-commerce domains show that the attributes obtained by our algorithm compare favorably with manually determined attributes to be used for Deep Web queries.},
added-at = {2008-11-04T11:47:38.000+0100},
address = {New York, NY},
author = {An, Yoo Jung and Geller, James and Wu, Yi-Ta and Chun, Soon Ae},
biburl = {https://www.bibsonomy.org/bibtex/2ccad0a337d878ca497b14364397a4dd8/else_project},
booktitle = {SAC '07: Proceedings of the 2007 ACM symposium on Applied computing},
description = {Semantic deep web},
doi = {http://doi.acm.org/10.1145/1244002.1244355},
interhash = {08a35f17dbd0befdbdc573d80b59e894},
intrahash = {ccad0a337d878ca497b14364397a4dd8},
isbn = {1-59593-480-4},
keywords = {ontologies queries search semantic_deep_web web2.0},
location = {Seoul, Korea},
pages = {1667--1672},
publisher = {ACM},
timestamp = {2008-11-04T11:47:38.000+0100},
title = {Semantic deep web: automatic attribute extraction from the deep web data sources},
url = {http://portal.acm.org/citation.cfm?doid=1244002.1244355#},
year = 2007
}