The hierarchical RSS-DSS algorithm is introduced for
dynamically filtering large datasets based on the
concepts of training pattern age and difficulty, while
using a data structure to facilitate the efficient use
of memory hierarchies. Such a scheme provides the basis
for training genetic programming (GP) on a data set of
half a million patterns in 15 min. The method is
generic, thus, not specific to a particular GP
structure, computing platform, or application context.
The method is demonstrated on the real-world KDD-99
intrusion detection data set, resulting in solutions
competitive with those identified in the original
KDD-99 competition, while only using a fraction of the
original features. Parameters of the RSS-DSS algorithm
are demonstrated to be effective over a wide range of
values. An analysis of different cost functions
indicates that hierarchical fitness functions provide
the most effective solutions.
INSPEC Accession Number: 8458415
Quest Software Inc., Halifax, NS, Canada
p226 only uses "8 of 41 features". Linear GP, L-GP.
p227 "No benefits were observed in making such a
mutation operator field specific" p235 "not all
training set patterns are equally significant"
%0 Journal Article
%1 song:2005:TEC
%A Song, Dong
%A Heywood, Malcolm I.
%A Zincir-Heywood, A. Nur
%D 2005
%J IEEE Transactions on Evolutionary Computation
%K (DSS), (artificial KDD-99 RSS-DSS algorithm, algorithms, anomaly cost data data, dataset detection detection, dynamical filtering, fitness function, functions, genetic hierarchical intelligence), intrusion large learning mining, of programming programming, real-world security selection set,Dynamic sets subset training,
%N 3
%P 225--239
%R doi:10.1109/TEVC.2004.841683
%T Training genetic programming on half a million
patterns: an example from anomaly detection
%V 9
%X The hierarchical RSS-DSS algorithm is introduced for
dynamically filtering large datasets based on the
concepts of training pattern age and difficulty, while
using a data structure to facilitate the efficient use
of memory hierarchies. Such a scheme provides the basis
for training genetic programming (GP) on a data set of
half a million patterns in 15 min. The method is
generic, thus, not specific to a particular GP
structure, computing platform, or application context.
The method is demonstrated on the real-world KDD-99
intrusion detection data set, resulting in solutions
competitive with those identified in the original
KDD-99 competition, while only using a fraction of the
original features. Parameters of the RSS-DSS algorithm
are demonstrated to be effective over a wide range of
values. An analysis of different cost functions
indicates that hierarchical fitness functions provide
the most effective solutions.
@article{song:2005:TEC,
abstract = {The hierarchical RSS-DSS algorithm is introduced for
dynamically filtering large datasets based on the
concepts of training pattern age and difficulty, while
using a data structure to facilitate the efficient use
of memory hierarchies. Such a scheme provides the basis
for training genetic programming (GP) on a data set of
half a million patterns in 15 min. The method is
generic, thus, not specific to a particular GP
structure, computing platform, or application context.
The method is demonstrated on the real-world KDD-99
intrusion detection data set, resulting in solutions
competitive with those identified in the original
KDD-99 competition, while only using a fraction of the
original features. Parameters of the RSS-DSS algorithm
are demonstrated to be effective over a wide range of
values. An analysis of different cost functions
indicates that hierarchical fitness functions provide
the most effective solutions.},
added-at = {2008-06-19T17:35:00.000+0200},
author = {Song, Dong and Heywood, Malcolm I. and Zincir-Heywood, A. Nur},
biburl = {https://www.bibsonomy.org/bibtex/272cf7ee5f7d66e96cb347faabbc8f1fd/brazovayeye},
doi = {doi:10.1109/TEVC.2004.841683},
interhash = {ea2a6186317c1d8ffd37d55ac1bac3cd},
intrahash = {72cf7ee5f7d66e96cb347faabbc8f1fd},
issn = {1089-778X},
journal = {IEEE Transactions on Evolutionary Computation},
keywords = {(DSS), (artificial KDD-99 RSS-DSS algorithm, algorithms, anomaly cost data data, dataset detection detection, dynamical filtering, fitness function, functions, genetic hierarchical intelligence), intrusion large learning mining, of programming programming, real-world security selection set,Dynamic sets subset training,},
month = {June},
notes = {INSPEC Accession Number: 8458415
Quest Software Inc., Halifax, NS, Canada
p226 only uses {"}8 of 41 features{"}. Linear GP, L-GP.
p227 {"}No benefits were observed in making such a
mutation operator field specific{"} p235 {"}not all
training set patterns are equally significant{"}},
number = 3,
pages = {225--239},
size = {15 pages},
timestamp = {2008-06-19T17:51:57.000+0200},
title = {Training genetic programming on half a million
patterns: an example from anomaly detection},
volume = 9,
year = 2005
}