Topics in 0-1 datasets are sets of variables whose occurrences are positively connected together. Earlier, we described a simple generative topic model. In this paper we show that, given data produced by this model, the lift statistics of attributes can be described in matrix form. We use this result to obtain a simple algorithm for finding topics in 0-1 data. We also show that a problem related to the identification of topics is NP-hard. We give experimental results on the topic identification problem, both on generated and real data.
ER -
%0 Journal Article
%1 keyhere
%A Seppänen, Jouni K.
%A Bingham, Ella
%A Mannila, Heikki
%D 2003
%J Knowledge Discovery in Databases: PKDD 2003
%K plsi top-k
%P 423--434
%T A Simple Algorithm for Topic Identification in 0–1 Data
%U http://www.springerlink.com/content/dvhpk89ecc3pb2dn
%X Topics in 0-1 datasets are sets of variables whose occurrences are positively connected together. Earlier, we described a simple generative topic model. In this paper we show that, given data produced by this model, the lift statistics of attributes can be described in matrix form. We use this result to obtain a simple algorithm for finding topics in 0-1 data. We also show that a problem related to the identification of topics is NP-hard. We give experimental results on the topic identification problem, both on generated and real data.
ER -
@article{keyhere,
abstract = {Topics in 0-1 datasets are sets of variables whose occurrences are positively connected together. Earlier, we described a simple generative topic model. In this paper we show that, given data produced by this model, the lift statistics of attributes can be described in matrix form. We use this result to obtain a simple algorithm for finding topics in 0-1 data. We also show that a problem related to the identification of topics is NP-hard. We give experimental results on the topic identification problem, both on generated and real data.
ER -},
added-at = {2009-04-24T17:01:07.000+0200},
author = {Seppänen, Jouni K. and Bingham, Ella and Mannila, Heikki},
biburl = {https://www.bibsonomy.org/bibtex/21c1e1021a18bbea07e6acce53d5b809c/claudio.lucchese},
description = {SpringerLink - Book Chapter},
interhash = {5d9fd01d58fda8727dfd280d01fcb920},
intrahash = {1c1e1021a18bbea07e6acce53d5b809c},
journal = {Knowledge Discovery in Databases: PKDD 2003},
keywords = {plsi top-k},
pages = {423--434},
timestamp = {2009-04-24T17:01:07.000+0200},
title = {A Simple Algorithm for Topic Identification in 0–1 Data},
url = {http://www.springerlink.com/content/dvhpk89ecc3pb2dn},
year = 2003
}