D. Ramage, A. Rafferty, and C. Manning. Proceedings of the 2009 Workshop on Graph-based Methods for Natural Language Processing, page 23--31. Stroudsburg, PA, USA, Association for Computational Linguistics, (2009)
Abstract
Many tasks in NLP stand to benefit from robust measures of semantic similarity for units above the level of individual words. Rich semantic resources such as WordNet provide local semantic information at the lexical level. However, effectively combining this information to compute scores for phrases or sentences is an open problem. Our algorithm aggregates local relatedness information via a random walk over a graph constructed from an underlying lexical resource. The stationary distribution of the graph walk forms a "semantic signature" that can be compared to another such distribution to get a relat-edness score for texts. On a paraphrase recognition task, the algorithm achieves an 18.5% relative reduction in error rate over a vector-space baseline. We also show that the graph walk similarity between texts has complementary value as a feature for recognizing textual entailment, improving on a competitive baseline system.
%0 Conference Paper
%1 Ramage:2009:RWT:1708124.1708131
%A Ramage, Daniel
%A Rafferty, Anna N.
%A Manning, Christopher D.
%B Proceedings of the 2009 Workshop on Graph-based Methods for Natural Language Processing
%C Stroudsburg, PA, USA
%D 2009
%I Association for Computational Linguistics
%K NLP semantic_signature textual_entailment
%P 23--31
%T Random walks for text semantic similarity
%U http://portal.acm.org/citation.cfm?id=1708124.1708131
%X Many tasks in NLP stand to benefit from robust measures of semantic similarity for units above the level of individual words. Rich semantic resources such as WordNet provide local semantic information at the lexical level. However, effectively combining this information to compute scores for phrases or sentences is an open problem. Our algorithm aggregates local relatedness information via a random walk over a graph constructed from an underlying lexical resource. The stationary distribution of the graph walk forms a "semantic signature" that can be compared to another such distribution to get a relat-edness score for texts. On a paraphrase recognition task, the algorithm achieves an 18.5% relative reduction in error rate over a vector-space baseline. We also show that the graph walk similarity between texts has complementary value as a feature for recognizing textual entailment, improving on a competitive baseline system.
%@ 978-1-932432-54-1
@inproceedings{Ramage:2009:RWT:1708124.1708131,
abstract = {Many tasks in NLP stand to benefit from robust measures of semantic similarity for units above the level of individual words. Rich semantic resources such as WordNet provide local semantic information at the lexical level. However, effectively combining this information to compute scores for phrases or sentences is an open problem. Our algorithm aggregates local relatedness information via a random walk over a graph constructed from an underlying lexical resource. The stationary distribution of the graph walk forms a "semantic signature" that can be compared to another such distribution to get a relat-edness score for texts. On a paraphrase recognition task, the algorithm achieves an 18.5% relative reduction in error rate over a vector-space baseline. We also show that the graph walk similarity between texts has complementary value as a feature for recognizing textual entailment, improving on a competitive baseline system.},
acmid = {1708131},
added-at = {2011-06-20T12:21:39.000+0200},
address = {Stroudsburg, PA, USA},
author = {Ramage, Daniel and Rafferty, Anna N. and Manning, Christopher D.},
biburl = {https://www.bibsonomy.org/bibtex/29db733de78c6e28b51cc2556f9dabb69/jennymac},
booktitle = {Proceedings of the 2009 Workshop on Graph-based Methods for Natural Language Processing},
description = {Random walks for text semantic similarity},
interhash = {bc34ccf5337a2d0ecf63f7752f961b6a},
intrahash = {9db733de78c6e28b51cc2556f9dabb69},
isbn = {978-1-932432-54-1},
keywords = {NLP semantic_signature textual_entailment},
location = {Suntec, Singapore},
numpages = {9},
pages = {23--31},
publisher = {Association for Computational Linguistics},
series = {TextGraphs-4},
timestamp = {2011-06-20T12:21:39.000+0200},
title = {Random walks for text semantic similarity},
url = {http://portal.acm.org/citation.cfm?id=1708124.1708131},
year = 2009
}