Many systems for tasks such as question answering, multi-document summarization, and infor- mation retrieval need robust numerical measures of lexical relatedness. Standard thesaurus-based measures of word pair similarity are based on only a single path between those words in the thesaurus graph. By contrast, we propose a new model of lexical semantic relatedness that incorporates information from every explicit or implicit path connecting the two words in the entire graph. Our model uses a random walk over nodes and edges derived from WordNet links and corpus statistics. We treat the graph as a Markov chain and compute a word-specific sta- tionary distribution via a generalized PageRank algorithm. Semantic relatedness of a word pair is scored by a novel divergence measure, ZKL, that outperforms existing measures on certain classes of distributions. In our experiments, the resulting relatedness measure is the WordNet-based measure most highly correlated with human similarity judgments by rank ordering at ρ = .90.
%0 Conference Paper
%1 hughes2007lexical
%A Hughes, Thad
%A Ramage, Daniel
%B EMNLP-CoNLL
%D 2007
%K random relatedness semantic walk
%P 581--589
%T Lexical Semantic Relatedness with Random Graph Walks
%U http://nlp.stanford.edu/dramage/papers/lexical-emnlp07.pdf
%X Many systems for tasks such as question answering, multi-document summarization, and infor- mation retrieval need robust numerical measures of lexical relatedness. Standard thesaurus-based measures of word pair similarity are based on only a single path between those words in the thesaurus graph. By contrast, we propose a new model of lexical semantic relatedness that incorporates information from every explicit or implicit path connecting the two words in the entire graph. Our model uses a random walk over nodes and edges derived from WordNet links and corpus statistics. We treat the graph as a Markov chain and compute a word-specific sta- tionary distribution via a generalized PageRank algorithm. Semantic relatedness of a word pair is scored by a novel divergence measure, ZKL, that outperforms existing measures on certain classes of distributions. In our experiments, the resulting relatedness measure is the WordNet-based measure most highly correlated with human similarity judgments by rank ordering at ρ = .90.
@inproceedings{hughes2007lexical,
abstract = {Many systems for tasks such as question answering, multi-document summarization, and infor- mation retrieval need robust numerical measures of lexical relatedness. Standard thesaurus-based measures of word pair similarity are based on only a single path between those words in the thesaurus graph. By contrast, we propose a new model of lexical semantic relatedness that incorporates information from every explicit or implicit path connecting the two words in the entire graph. Our model uses a random walk over nodes and edges derived from WordNet links and corpus statistics. We treat the graph as a Markov chain and compute a word-specific sta- tionary distribution via a generalized PageRank algorithm. Semantic relatedness of a word pair is scored by a novel divergence measure, ZKL, that outperforms existing measures on certain classes of distributions. In our experiments, the resulting relatedness measure is the WordNet-based measure most highly correlated with human similarity judgments by rank ordering at ρ = .90.},
added-at = {2016-07-14T20:47:17.000+0200},
author = {Hughes, Thad and Ramage, Daniel},
biburl = {https://www.bibsonomy.org/bibtex/2ab5ce7ed444b5aa062027691b059b749/thoni},
booktitle = {EMNLP-CoNLL},
interhash = {30f73b0045c291c870a2c5760ff1fec9},
intrahash = {ab5ce7ed444b5aa062027691b059b749},
keywords = {random relatedness semantic walk},
pages = {581--589},
timestamp = {2016-11-02T06:50:19.000+0100},
title = {Lexical Semantic Relatedness with Random Graph Walks},
url = {http://nlp.stanford.edu/dramage/papers/lexical-emnlp07.pdf},
year = 2007
}