This paper investigates the trade-off between the expressiveness of the pattern language and the performance of the pattern miner in structured data mining. This trade-off is investigated in the context of correlated pattern mining, which is concerned with finding the k-best patterns according to a convex criterion, for the pattern languages of itemsets, multi-itemsets, sequences, trees and graphs. The criteria used in our investigation are the typical ones in data mining: computational cost and predictive accuracy and the domain is that of mining molecular graph databases. More specifically, we provide empirical answers to the following questions: how does the expressive power of the language affect the computational cost? and what is the trade-off between expressiveness of the pattern language and the predictive accuracy of the learned model? While answering the first question, we also introduce a novel stepwise approach to correlated pattern mining in which the results of mining a simpler pattern language are employed as a starting point for mining in a more complex one. This stepwise approach typically leads to significant speed-ups (up to a factor 1000) for mining graphs.
%0 Conference Paper
%1 Bringmann:EtAl:06
%A Bringmann, Bjorn
%A Zimmermann, Albrecht
%A Raedt, Luc De
%A Nijssen, Siegfried
%D 2006
%J 10th European Conference on Principles and Practice of Knowledge Discovery in Databases
%K 2006 pkdd structure
%P 55--66
%T Don't Be Afraid of Simpler Patterns
%U http://dx.doi.org/10.1007/11871637_10
%X This paper investigates the trade-off between the expressiveness of the pattern language and the performance of the pattern miner in structured data mining. This trade-off is investigated in the context of correlated pattern mining, which is concerned with finding the k-best patterns according to a convex criterion, for the pattern languages of itemsets, multi-itemsets, sequences, trees and graphs. The criteria used in our investigation are the typical ones in data mining: computational cost and predictive accuracy and the domain is that of mining molecular graph databases. More specifically, we provide empirical answers to the following questions: how does the expressive power of the language affect the computational cost? and what is the trade-off between expressiveness of the pattern language and the predictive accuracy of the learned model? While answering the first question, we also introduce a novel stepwise approach to correlated pattern mining in which the results of mining a simpler pattern language are employed as a starting point for mining in a more complex one. This stepwise approach typically leads to significant speed-ups (up to a factor 1000) for mining graphs.
@inproceedings{Bringmann:EtAl:06,
abstract = {This paper investigates the trade-off between the expressiveness of the pattern language and the performance of the pattern miner in structured data mining. This trade-off is investigated in the context of correlated pattern mining, which is concerned with finding the k-best patterns according to a convex criterion, for the pattern languages of itemsets, multi-itemsets, sequences, trees and graphs. The criteria used in our investigation are the typical ones in data mining: computational cost and predictive accuracy and the domain is that of mining molecular graph databases. More specifically, we provide empirical answers to the following questions: how does the expressive power of the language affect the computational cost? and what is the trade-off between expressiveness of the pattern language and the predictive accuracy of the learned model? While answering the first question, we also introduce a novel stepwise approach to correlated pattern mining in which the results of mining a simpler pattern language are employed as a starting point for mining in a more complex one. This stepwise approach typically leads to significant speed-ups (up to a factor 1000) for mining graphs.},
added-at = {2007-02-14T19:56:42.000+0100},
author = {Bringmann, Bjorn and Zimmermann, Albrecht and Raedt, Luc De and Nijssen, Siegfried},
biburl = {https://www.bibsonomy.org/bibtex/27a0cfc3d63bf5babcd0fb146459e1418/seandalai},
interhash = {8d51d17db7d031df34e6f7fe5e117405},
intrahash = {7a0cfc3d63bf5babcd0fb146459e1418},
journal = {10th European Conference on Principles and Practice of Knowledge Discovery in Databases},
keywords = {2006 pkdd structure},
pages = {55--66},
timestamp = {2007-02-14T19:56:42.000+0100},
title = {Don't Be Afraid of Simpler Patterns},
url = {http://dx.doi.org/10.1007/11871637_10},
year = 2006
}