We present a new composite similarity metric that combines information from multiple linguistic indicators to measure semantic distance between pairs of small textual units. Several potential features are investigated and an optimal combination is selected via machine learning. We discuss a more restrictive definition of similarity than traditional, document-level and information retrieval-oriented, notions of
similarity, and motivate it by showing its relevance to the multi-document text summarization problem. Results from our system are evaluated against standard information retrieval
techniques, establishing that the new method is more effective in identifying closely related textual units.
%0 Conference Paper
%1 default
%A Hatzivassiloglou, Vaseleios
%A Klavans, Judith L.
%A Eskin, Eleazar
%D 1999
%K retrieval semantic_distance similarity textual units
%P 203--212
%T Detecting Text Similarity over Short Passages: Exploring Linguistic Feature Combinations via Machine Learning
%U http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.7387
%X We present a new composite similarity metric that combines information from multiple linguistic indicators to measure semantic distance between pairs of small textual units. Several potential features are investigated and an optimal combination is selected via machine learning. We discuss a more restrictive definition of similarity than traditional, document-level and information retrieval-oriented, notions of
similarity, and motivate it by showing its relevance to the multi-document text summarization problem. Results from our system are evaluated against standard information retrieval
techniques, establishing that the new method is more effective in identifying closely related textual units.
@inproceedings{default,
abstract = {We present a new composite similarity metric that combines information from multiple linguistic indicators to measure semantic distance between pairs of small textual units. Several potential features are investigated and an optimal combination is selected via machine learning. We discuss a more restrictive definition of similarity than traditional, document-level and information retrieval-oriented, notions of
similarity, and motivate it by showing its relevance to the multi-document text summarization problem. Results from our system are evaluated against standard information retrieval
techniques, establishing that the new method is more effective in identifying closely related textual units.},
added-at = {2011-06-17T14:17:32.000+0200},
author = {Hatzivassiloglou, Vaseleios and Klavans, Judith L. and Eskin, Eleazar},
biburl = {https://www.bibsonomy.org/bibtex/2c9603bb8ee5c92811d1d5b775b057a9a/jennymac},
interhash = {b883cfa6ac449c5cab03018918517380},
intrahash = {c9603bb8ee5c92811d1d5b775b057a9a},
keywords = {retrieval semantic_distance similarity textual units},
pages = {203--212},
timestamp = {2011-06-17T14:17:32.000+0200},
title = {Detecting Text Similarity over Short Passages: Exploring Linguistic Feature Combinations via Machine Learning},
url = {http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.7387},
year = 1999
}