Text categorization is a fundamental task in document
processing, allowing the automated handling
of enormous streams of documents in
electronic form. One difficulty in handling some
classes of documents is the presence of different
kinds of textual errors, such as spelling and
grammatical errors in email, and character recognition
errors in documents that come through
OCR. Text categorization must work reliably on
all input, and thus must tolerate some level of
these kinds of problems.
We...
%0 Conference Proceedings
%1 citeulike:1430
%A Cavnar, William B.
%A Trenkle, John M.
%B Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis and Information Retrieval
%C Las Vegas, US
%D 1994
%K classification
%P 161--175
%T N-Gram-Based Text Categorization
%U http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.53.9367
%X Text categorization is a fundamental task in document
processing, allowing the automated handling
of enormous streams of documents in
electronic form. One difficulty in handling some
classes of documents is the presence of different
kinds of textual errors, such as spelling and
grammatical errors in email, and character recognition
errors in documents that come through
OCR. Text categorization must work reliably on
all input, and thus must tolerate some level of
these kinds of problems.
We...
@proceedings{citeulike:1430,
abstract = {{Text categorization is a fundamental task in document
processing, allowing the automated handling
of enormous streams of documents in
electronic form. One difficulty in handling some
classes of documents is the presence of different
kinds of textual errors, such as spelling and
grammatical errors in email, and character recognition
errors in documents that come through
OCR. Text categorization must work reliably on
all input, and thus must tolerate some level of
these kinds of problems.
We...}},
added-at = {2010-12-17T18:47:41.000+0100},
address = {Las Vegas, US},
author = {Cavnar, William B. and Trenkle, John M.},
biburl = {https://www.bibsonomy.org/bibtex/21f91e112b870dfaad65c70a24a90f355/mortimer_m8},
booktitle = {Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis and Information Retrieval},
citeulike-article-id = {1430},
citeulike-linkout-0 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.53.9367},
interhash = {f0473fcb06a7b07f51bbfdc71b4b063c},
intrahash = {1f91e112b870dfaad65c70a24a90f355},
keywords = {classification},
pages = {161--175},
posted-at = {2005-05-05 11:14:18},
priority = {3},
timestamp = {2010-12-20T11:11:25.000+0100},
title = {{N-Gram-Based Text Categorization}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.53.9367},
year = 1994
}