G. Dupret, and B. Piwowarski. ECML/PKDD 2005 Workshop on Knowledge Discovery and Ontologies, (2005)
Abstract
We show that the singular value decomposition of a term similarity matrix induces a term taxonomy. This decomposition, used in Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying “concepts�? that can be used in place of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are less sensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and concepts and show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varying the number of dimensions, terms similar but more specific than the concept can be identified, leading to a term taxonomy.
%0 Conference Paper
%1 dupret2005deducing
%A Dupret, Georges
%A Piwowarski, Benjamin
%B ECML/PKDD 2005 Workshop on Knowledge Discovery and Ontologies
%D 2005
%K ol_web2.0 taxonomy_learning methods_concepthierarchy
%T Deducing a Term Taxonomy from Term Similarities
%X We show that the singular value decomposition of a term similarity matrix induces a term taxonomy. This decomposition, used in Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying “concepts�? that can be used in place of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are less sensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and concepts and show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varying the number of dimensions, terms similar but more specific than the concept can be identified, leading to a term taxonomy.
@inproceedings{dupret2005deducing,
abstract = {We show that the singular value decomposition of a term similarity matrix induces a term taxonomy. This decomposition, used in Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying “concepts�? that can be used in place of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are less sensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and concepts and show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varying the number of dimensions, terms similar but more specific than the concept can be identified, leading to a term taxonomy.},
added-at = {2011-02-17T17:42:04.000+0100},
at = {2010-06-16 01:28:25},
author = {Dupret, Georges and Piwowarski, Benjamin},
biburl = {https://www.bibsonomy.org/bibtex/2b40e062cdb7e352a8a990ba9d951aa0c/dbenz},
booktitle = {ECML/PKDD 2005 Workshop on Knowledge Discovery and Ontologies},
file = {dupret2005deducing.pdf:dupret2005deducing.pdf:PDF},
groups = {public},
interhash = {cd3d6576cddc507e34f16b72e096178a},
intrahash = {b40e062cdb7e352a8a990ba9d951aa0c},
keywords = {ol_web2.0 taxonomy_learning methods_concepthierarchy},
misc_id = {7328234},
priority = {2},
timestamp = {2013-07-31T15:39:42.000+0200},
title = {Deducing a Term Taxonomy from Term Similarities},
username = {dbenz},
year = 2005
}