One of the ultimate goals of natural language processing (NLP) systems is understanding the meaning of what is being transmitted, irrespective of the medium (e.g., written versus spoken) or the form (e.g., static documents versus dynamic dialogues). Although much work has been done in traditional language domains such as speech and static written text, little has yet been done in the newer communication domains enabled by the Internet, e.g., online chat and instant messaging. This is in part due to the fact that there are no annotated chat corpora available to the broader research community. The purpose of this research is to build a chat corpus, tagged with lexical (token part-of-speech labels), syntactic (post parse tree), and discourse (post classification) information. Such a corpus can then be used to develop more complex, statistical-based NLP applications that perform tasks such as author profiling, entity identification, and social network analysis.
Description
Lexical and Discourse Analysis of Online Chat Dialog
%0 Conference Paper
%1 1306307
%A Forsythand, Eric N.
%A Martell, Craig H.
%B ICSC '07: Proceedings of the International Conference on Semantic Computing
%C Washington, DC, USA
%D 2007
%I IEEE Computer Society
%K NLP chatlog classification post
%P 19--26
%R http://dx.doi.org/10.1109/ICSC.2007.54
%T Lexical and Discourse Analysis of Online Chat Dialog
%U http://portal.acm.org/citation.cfm?id=1306307
%X One of the ultimate goals of natural language processing (NLP) systems is understanding the meaning of what is being transmitted, irrespective of the medium (e.g., written versus spoken) or the form (e.g., static documents versus dynamic dialogues). Although much work has been done in traditional language domains such as speech and static written text, little has yet been done in the newer communication domains enabled by the Internet, e.g., online chat and instant messaging. This is in part due to the fact that there are no annotated chat corpora available to the broader research community. The purpose of this research is to build a chat corpus, tagged with lexical (token part-of-speech labels), syntactic (post parse tree), and discourse (post classification) information. Such a corpus can then be used to develop more complex, statistical-based NLP applications that perform tasks such as author profiling, entity identification, and social network analysis.
%@ 0-7695-2997-6
@inproceedings{1306307,
abstract = {One of the ultimate goals of natural language processing (NLP) systems is understanding the meaning of what is being transmitted, irrespective of the medium (e.g., written versus spoken) or the form (e.g., static documents versus dynamic dialogues). Although much work has been done in traditional language domains such as speech and static written text, little has yet been done in the newer communication domains enabled by the Internet, e.g., online chat and instant messaging. This is in part due to the fact that there are no annotated chat corpora available to the broader research community. The purpose of this research is to build a chat corpus, tagged with lexical (token part-of-speech labels), syntactic (post parse tree), and discourse (post classification) information. Such a corpus can then be used to develop more complex, statistical-based NLP applications that perform tasks such as author profiling, entity identification, and social network analysis.},
added-at = {2010-03-24T03:59:31.000+0100},
address = {Washington, DC, USA},
author = {Forsythand, Eric N. and Martell, Craig H.},
biburl = {https://www.bibsonomy.org/bibtex/220ddf0864b4ee1f42cd98e774e562bf7/zhenzhenx},
booktitle = {ICSC '07: Proceedings of the International Conference on Semantic Computing},
description = {Lexical and Discourse Analysis of Online Chat Dialog},
doi = {http://dx.doi.org/10.1109/ICSC.2007.54},
interhash = {943356a0fc22afa6882ea305eb1ddf00},
intrahash = {20ddf0864b4ee1f42cd98e774e562bf7},
isbn = {0-7695-2997-6},
keywords = {NLP chatlog classification post},
pages = {19--26},
publisher = {IEEE Computer Society},
timestamp = {2010-06-16T11:05:42.000+0200},
title = {Lexical and Discourse Analysis of Online Chat Dialog},
url = {http://portal.acm.org/citation.cfm?id=1306307},
year = 2007
}