D. Milne, and I. Witten. Proceedings of the 17th ACM Conference on Information and Knowledge Management, page 509--518. New York, NY, USA, ACM, (2008)
DOI: 10.1145/1458082.1458150
Abstract
This paper describes how to automatically cross-reference documents with Wikipedia: the largest knowledge base ever known. It explains how machine learning can be used to identify significant terms within unstructured text, and enrich it with links to the appropriate Wikipedia articles. The resulting link detector and disambiguator performs very well, with recall and precision of almost 75%. This performance is constant whether the system is evaluated on Wikipedia articles or "real world" documents. This work has implications far beyond enriching documents with explanatory links. It can provide structured knowledge about any unstructured fragment of text. Any task that is currently addressed with bags of words - indexing, clustering, retrieval, and summarization to name a few - could use the techniques described here to draw on a vast network of concepts and semantics.
%0 Conference Paper
%1 Milne:2008:LLW:1458082.1458150
%A Milne, David
%A Witten, Ian H.
%B Proceedings of the 17th ACM Conference on Information and Knowledge Management
%C New York, NY, USA
%D 2008
%I ACM
%K disambiguation linking phdproposal wikify wikipedia
%P 509--518
%R 10.1145/1458082.1458150
%T Learning to Link with Wikipedia
%U http://doi.acm.org/10.1145/1458082.1458150
%X This paper describes how to automatically cross-reference documents with Wikipedia: the largest knowledge base ever known. It explains how machine learning can be used to identify significant terms within unstructured text, and enrich it with links to the appropriate Wikipedia articles. The resulting link detector and disambiguator performs very well, with recall and precision of almost 75%. This performance is constant whether the system is evaluated on Wikipedia articles or "real world" documents. This work has implications far beyond enriching documents with explanatory links. It can provide structured knowledge about any unstructured fragment of text. Any task that is currently addressed with bags of words - indexing, clustering, retrieval, and summarization to name a few - could use the techniques described here to draw on a vast network of concepts and semantics.
%@ 978-1-59593-991-3
@inproceedings{Milne:2008:LLW:1458082.1458150,
abstract = {This paper describes how to automatically cross-reference documents with Wikipedia: the largest knowledge base ever known. It explains how machine learning can be used to identify significant terms within unstructured text, and enrich it with links to the appropriate Wikipedia articles. The resulting link detector and disambiguator performs very well, with recall and precision of almost 75%. This performance is constant whether the system is evaluated on Wikipedia articles or "real world" documents. This work has implications far beyond enriching documents with explanatory links. It can provide structured knowledge about any unstructured fragment of text. Any task that is currently addressed with bags of words - indexing, clustering, retrieval, and summarization to name a few - could use the techniques described here to draw on a vast network of concepts and semantics.},
acmid = {1458150},
added-at = {2015-01-06T15:19:35.000+0100},
address = {New York, NY, USA},
author = {Milne, David and Witten, Ian H.},
biburl = {https://www.bibsonomy.org/bibtex/283ce81ff43433b4433a776c07411b664/asmelash},
booktitle = {Proceedings of the 17th ACM Conference on Information and Knowledge Management},
description = {Learning to link with wikipedia},
doi = {10.1145/1458082.1458150},
interhash = {44159e289485110212602792e72bbd74},
intrahash = {83ce81ff43433b4433a776c07411b664},
isbn = {978-1-59593-991-3},
keywords = {disambiguation linking phdproposal wikify wikipedia},
location = {Napa Valley, California, USA},
numpages = {10},
pages = {509--518},
publisher = {ACM},
series = {CIKM '08},
timestamp = {2015-01-06T15:39:30.000+0100},
title = {Learning to Link with Wikipedia},
url = {http://doi.acm.org/10.1145/1458082.1458150},
year = 2008
}