Mining frequent patterns is plagued by the problem of pattern explosion, making pattern reduction techniques a key challenge in pattern mining. In this article we propose a novel theoretical framework for pattern reduction by measuring the robustness of a property of an itemset such as closedness or nonderivability. The robustness of a property is the probability that this property holds on random subsets of the original data. We study four properties, namely an itemset being closed, free, non-derivable, or totally shattered, and demonstrate how to compute the robustness analytically without actually sampling the data. Our concept of robustness has many advantages: Unlike statistical approaches for reducing patterns, we do not assume a null hypothesis or any noise model and, in contrast to noise-tolerant or approximate patterns, the robust patterns for a given property are always a subset of the patterns with this property. If the underlying property is monotonic then the measure is also monotonic, allowing us to efficiently mine robust itemsets. We further derive a parameter-free technique for ranking itemsets that can be used for top-k approaches. Our experiments demonstrate that we can successfully use the robustness measure to reduce the number of patterns and that ranking yields interesting itemsets.
%0 Journal Article
%1 Tatti:2014:FRI:2676651.2656261
%A Tatti, Nikolaj
%A Moerchen, Fabian
%A Calders, Toon
%C New York, NY, USA
%D 2014
%I ACM
%J ACM Trans. Database Syst.
%K fca
%N 3
%P 20:1--20:27
%R 10.1145/2656261
%T Finding Robust Itemsets Under Subsampling
%U http://doi.acm.org/10.1145/2656261
%V 39
%X Mining frequent patterns is plagued by the problem of pattern explosion, making pattern reduction techniques a key challenge in pattern mining. In this article we propose a novel theoretical framework for pattern reduction by measuring the robustness of a property of an itemset such as closedness or nonderivability. The robustness of a property is the probability that this property holds on random subsets of the original data. We study four properties, namely an itemset being closed, free, non-derivable, or totally shattered, and demonstrate how to compute the robustness analytically without actually sampling the data. Our concept of robustness has many advantages: Unlike statistical approaches for reducing patterns, we do not assume a null hypothesis or any noise model and, in contrast to noise-tolerant or approximate patterns, the robust patterns for a given property are always a subset of the patterns with this property. If the underlying property is monotonic then the measure is also monotonic, allowing us to efficiently mine robust itemsets. We further derive a parameter-free technique for ranking itemsets that can be used for top-k approaches. Our experiments demonstrate that we can successfully use the robustness measure to reduce the number of patterns and that ranking yields interesting itemsets.
@article{Tatti:2014:FRI:2676651.2656261,
abstract = {Mining frequent patterns is plagued by the problem of pattern explosion, making pattern reduction techniques a key challenge in pattern mining. In this article we propose a novel theoretical framework for pattern reduction by measuring the robustness of a property of an itemset such as closedness or nonderivability. The robustness of a property is the probability that this property holds on random subsets of the original data. We study four properties, namely an itemset being closed, free, non-derivable, or totally shattered, and demonstrate how to compute the robustness analytically without actually sampling the data. Our concept of robustness has many advantages: Unlike statistical approaches for reducing patterns, we do not assume a null hypothesis or any noise model and, in contrast to noise-tolerant or approximate patterns, the robust patterns for a given property are always a subset of the patterns with this property. If the underlying property is monotonic then the measure is also monotonic, allowing us to efficiently mine robust itemsets. We further derive a parameter-free technique for ranking itemsets that can be used for top-k approaches. Our experiments demonstrate that we can successfully use the robustness measure to reduce the number of patterns and that ranking yields interesting itemsets.},
acmid = {2656261},
added-at = {2019-03-01T18:51:26.000+0100},
address = {New York, NY, USA},
articleno = {20},
author = {Tatti, Nikolaj and Moerchen, Fabian and Calders, Toon},
biburl = {https://www.bibsonomy.org/bibtex/227d554848d514d843639ccfc048f3a7e/johirth},
doi = {10.1145/2656261},
interhash = {e06c4b217723f177820c396e0e8dd38a},
intrahash = {27d554848d514d843639ccfc048f3a7e},
issn = {0362-5915},
issue_date = {September 2014},
journal = {ACM Trans. Database Syst.},
keywords = {fca},
month = oct,
number = 3,
numpages = {27},
pages = {20:1--20:27},
publisher = {ACM},
timestamp = {2019-03-01T18:51:26.000+0100},
title = {Finding Robust Itemsets Under Subsampling},
url = {http://doi.acm.org/10.1145/2656261},
volume = 39,
year = 2014
}