Named entity disambiguation has become an important research area providing the basis for improving search engine precision and for enabling semantic search. Current approaches for the named entity disambiguation are usually based on exploiting structured semantic and lingual resources (e.g. WordNet, DBpedia). Unfortunately, each of these resources cover independently from each other insufficient information for the task of named entity disambiguation. On the one hand WordNet comprises a relative small number of named entities while on the other hand DBpedia provides only little context for named entities. Our approach is based on the use of multi-lingual Wikipedia data. We show how the combination of multi-lingual resources can be used for named entity disambiguation. Based on a German and an English document corpus, we evaluate various similarity measures and algorithms for extracting data for named entity disambiguation. We show that the intelligent filtering of context data and the combination of multi-lingual information provides high quality named entity disambiguation results.
%0 Conference Paper
%1 ir3
%A Lommatzsch, Andreas
%A Ploch, Danuta
%A Luca, Ernesto William De
%A Albayrak., Sahin
%B Proceedings of LWA2010 - Workshop-Woche: Lernen, Wissen & Adaptivitaet
%C Kassel, Germany
%D 2010
%E Atzmüller, Martin
%E Benz, Dominik
%E Hotho, Andreas
%E Stumme, Gerd
%K disambiguation entity extraction information mining multilingual named room:-1418 session:ir3 system text wikipedia workshop:ir
%T Named Entity Disambiguation for German News Articles
%U http://www.kde.cs.uni-kassel.de/conf/lwa10/papers/ir3.pdf
%X Named entity disambiguation has become an important research area providing the basis for improving search engine precision and for enabling semantic search. Current approaches for the named entity disambiguation are usually based on exploiting structured semantic and lingual resources (e.g. WordNet, DBpedia). Unfortunately, each of these resources cover independently from each other insufficient information for the task of named entity disambiguation. On the one hand WordNet comprises a relative small number of named entities while on the other hand DBpedia provides only little context for named entities. Our approach is based on the use of multi-lingual Wikipedia data. We show how the combination of multi-lingual resources can be used for named entity disambiguation. Based on a German and an English document corpus, we evaluate various similarity measures and algorithms for extracting data for named entity disambiguation. We show that the intelligent filtering of context data and the combination of multi-lingual information provides high quality named entity disambiguation results.
@inproceedings{ir3,
abstract = {Named entity disambiguation has become an important research area providing the basis for improving search engine precision and for enabling semantic search. Current approaches for the named entity disambiguation are usually based on exploiting structured semantic and lingual resources (e.g. WordNet, DBpedia). Unfortunately, each of these resources cover independently from each other insufficient information for the task of named entity disambiguation. On the one hand WordNet comprises a relative small number of named entities while on the other hand DBpedia provides only little context for named entities. Our approach is based on the use of multi-lingual Wikipedia data. We show how the combination of multi-lingual resources can be used for named entity disambiguation. Based on a German and an English document corpus, we evaluate various similarity measures and algorithms for extracting data for named entity disambiguation. We show that the intelligent filtering of context data and the combination of multi-lingual information provides high quality named entity disambiguation results.},
added-at = {2010-10-05T14:15:12.000+0200},
address = {Kassel, Germany},
author = {Lommatzsch, Andreas and Ploch, Danuta and Luca, Ernesto William De and Albayrak., Sahin},
biburl = {https://www.bibsonomy.org/bibtex/2ef033b004e2588678a381af288797d86/lwa2010},
booktitle = {Proceedings of LWA2010 - Workshop-Woche: Lernen, Wissen {\&} Adaptivitaet},
crossref = {lwa2010},
editor = {Atzmüller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd},
interhash = {9678fb8a2d3a63d41e649d6edf81e04b},
intrahash = {ef033b004e2588678a381af288797d86},
keywords = {disambiguation entity extraction information mining multilingual named room:-1418 session:ir3 system text wikipedia workshop:ir},
presentation_end = {2010-10-06 09:30:00},
presentation_start = {2010-10-06 09:00:00},
room = {-1418},
session = {ir3},
timestamp = {2010-10-05T14:15:13.000+0200},
title = {Named Entity Disambiguation for German News Articles},
track = {ir},
url = {http://www.kde.cs.uni-kassel.de/conf/lwa10/papers/ir3.pdf},
year = 2010
}