We investigate the structures present in the Enron email dataset using singular value decomposition and semidiscrete decomposition. Using word frequency profiles, we show that messages fall into two distinct groups, whose extrema are characterized by short messages and rare words versus long messages and common words. It is surprising that length of message and word use pattern should be related in this way. We also investigate relationships among individuals based on their patterns of word use in email. We show that word use is correlated to function within the organization, as expected. Lastly, we show that relative changes to individuals' word usage over time can be used to identify key players in major company events.
%0 Journal Article
%1 Keila:2005:SEE:1110938.1110944
%A Keila, P. S.
%A Skillicorn, D. B.
%C Hingham, MA, USA
%D 2005
%I Kluwer Academic Publishers
%J Comput. Math. Organ. Theory
%K Analysis DataSet ENRON
%N 3
%P 183--199
%R 10.1007/s10588-005-5379-y
%T Structure in the Enron Email Dataset
%U http://dx.doi.org/10.1007/s10588-005-5379-y
%V 11
%X We investigate the structures present in the Enron email dataset using singular value decomposition and semidiscrete decomposition. Using word frequency profiles, we show that messages fall into two distinct groups, whose extrema are characterized by short messages and rare words versus long messages and common words. It is surprising that length of message and word use pattern should be related in this way. We also investigate relationships among individuals based on their patterns of word use in email. We show that word use is correlated to function within the organization, as expected. Lastly, we show that relative changes to individuals' word usage over time can be used to identify key players in major company events.
@article{Keila:2005:SEE:1110938.1110944,
abstract = {We investigate the structures present in the Enron email dataset using singular value decomposition and semidiscrete decomposition. Using word frequency profiles, we show that messages fall into two distinct groups, whose extrema are characterized by short messages and rare words versus long messages and common words. It is surprising that length of message and word use pattern should be related in this way. We also investigate relationships among individuals based on their patterns of word use in email. We show that word use is correlated to function within the organization, as expected. Lastly, we show that relative changes to individuals' word usage over time can be used to identify key players in major company events.},
acmid = {1110944},
added-at = {2013-07-15T10:23:12.000+0200},
address = {Hingham, MA, USA},
author = {Keila, P. S. and Skillicorn, D. B.},
biburl = {https://www.bibsonomy.org/bibtex/2e9fd7ffc5810e327fc0835a4a8f622ff/macek},
description = {Structure in the Enron Email Dataset},
doi = {10.1007/s10588-005-5379-y},
interhash = {3220e42f333ce76b2df4365877c6eb0c},
intrahash = {e9fd7ffc5810e327fc0835a4a8f622ff},
issn = {1381-298X},
issue_date = {October 2005},
journal = {Comput. Math. Organ. Theory},
keywords = {Analysis DataSet ENRON},
month = oct,
number = 3,
numpages = {17},
pages = {183--199},
publisher = {Kluwer Academic Publishers},
timestamp = {2013-07-15T10:23:12.000+0200},
title = {Structure in the Enron Email Dataset},
url = {http://dx.doi.org/10.1007/s10588-005-5379-y},
volume = 11,
year = 2005
}