The next wave in search technology will be driven by the identification, extraction, and exploitation of real-world entities represented in unstructured textual sources. Search systems will either let users express information needs naturally and analyze them more intelligently, or allow simple enhancements that add more user control on the search process. The data model will exploit graph structure where available, but not impose structure by fiat. First generation Web search, which uses graph information at the macroscopic level of inter-page hyperlinks, will be enhanced to use fine-grained graph models involving page regions, tables, sentences, phrases, and real-world-entities. New algorithms will combine probabilistic evidence from diverse features to produce responses that are not URLs or pages, but entities and their relationships, or explanations of how multiple entities are related.
* Argues for using graphs of parts of documents and entities instead of graphs of documents (as done nowadays).
* Explores IE from queries and from corpus.
* "The system may need to actively explore the corpus to propose new types and extract entities for old and new types. Eventually, we would like the system to learn how to learn."
%0 Book Section
%1 Chakrabarti2004
%A Chakrabarti, Soumen
%D 2004
%J Knowledge Discovery in Databases: PKDD 2004
%K entity graph-db information_extraction search
%P 9--16
%T Breaking Through the Syntax Barrier: Searching with Entities and Relations
%U http://www.springerlink.com/content/nrlylp2ht78ethh8
%X The next wave in search technology will be driven by the identification, extraction, and exploitation of real-world entities represented in unstructured textual sources. Search systems will either let users express information needs naturally and analyze them more intelligently, or allow simple enhancements that add more user control on the search process. The data model will exploit graph structure where available, but not impose structure by fiat. First generation Web search, which uses graph information at the macroscopic level of inter-page hyperlinks, will be enhanced to use fine-grained graph models involving page regions, tables, sentences, phrases, and real-world-entities. New algorithms will combine probabilistic evidence from diverse features to produce responses that are not URLs or pages, but entities and their relationships, or explanations of how multiple entities are related.
@incollection{Chakrabarti2004,
abstract = {The next wave in search technology will be driven by the identification, extraction, and exploitation of real-world entities represented in unstructured textual sources. Search systems will either let users express information needs naturally and analyze them more intelligently, or allow simple enhancements that add more user control on the search process. The data model will exploit graph structure where available, but not impose structure by fiat. First generation Web search, which uses graph information at the macroscopic level of inter-page hyperlinks, will be enhanced to use fine-grained graph models involving page regions, tables, sentences, phrases, and real-world-entities. New algorithms will combine probabilistic evidence from diverse features to produce responses that are not URLs or pages, but entities and their relationships, or explanations of how multiple entities are related.},
added-at = {2009-03-12T15:42:50.000+0100},
author = {Chakrabarti, Soumen},
biburl = {https://www.bibsonomy.org/bibtex/266806e53cc3bde73f84599e5b37ae868/lillejul},
citeulike-article-id = {2696959},
comment = {* Argues for using graphs of parts of documents and entities instead of graphs of documents (as done nowadays).
* Explores IE from queries and from corpus.
* "The system may need to actively explore the corpus to propose new types and extract entities for old and new types. Eventually, we would like the system to learn how to learn."},
interhash = {4f66125b24904e129f95efd8321baec1},
intrahash = {66806e53cc3bde73f84599e5b37ae868},
journal = {Knowledge Discovery in Databases: PKDD 2004},
keywords = {entity graph-db information_extraction search},
pages = {9--16},
posted-at = {2008-04-21 16:43:03},
priority = {0},
timestamp = {2009-04-22T10:29:38.000+0200},
title = {Breaking Through the Syntax Barrier: Searching with Entities and Relations},
url = {http://www.springerlink.com/content/nrlylp2ht78ethh8
},
year = 2004
}