Twitter is a popular micro-blogging service on the Web, where people can enter short messages, which
then become visible to some other users of the service. While the topics of these messages varies, there
are a lot of messages where the users express their opinions about some companies or their products.
These messages are a rich source of information for companies for sentiment analysis or opinion
mining. There is however a great obstacle for analyzing the messages directly: as the company names
are often ambiguous (e.g. apple, the fruit vs. Apple Inc.), one needs first to identify, which messages
are related to the company. In this paper we address this question. We present various techniques
for classifying tweet messages containing a given keyword, whether they are related to a particular
company with that name or not. We first present simple techniques, which make use of company
profiles, which we created semi-automatically from external Web sources. Our advanced techniques
take ambiguity estimations into account and also automatically extend the company profiles from the
twitter stream itself. We demonstrate the effectiveness of our methods through an extensive set of
experiments. Moreover, we extensively analyze the sources of errors in the classification. The analysis
not only brings further improvement, but also enables to use the human input more efficiently.
%0 Journal Article
%1 lsirentity
%A LSIR, EPFL IC
%D 2012
%K classification conceptExtraction entity twitter
%N 2
%P 88-115
%T Entity-based Classification of Twitter Messages
%U http://www.tmrfindia.org/ijcsa/v9i15.pdf
%V 9
%X Twitter is a popular micro-blogging service on the Web, where people can enter short messages, which
then become visible to some other users of the service. While the topics of these messages varies, there
are a lot of messages where the users express their opinions about some companies or their products.
These messages are a rich source of information for companies for sentiment analysis or opinion
mining. There is however a great obstacle for analyzing the messages directly: as the company names
are often ambiguous (e.g. apple, the fruit vs. Apple Inc.), one needs first to identify, which messages
are related to the company. In this paper we address this question. We present various techniques
for classifying tweet messages containing a given keyword, whether they are related to a particular
company with that name or not. We first present simple techniques, which make use of company
profiles, which we created semi-automatically from external Web sources. Our advanced techniques
take ambiguity estimations into account and also automatically extend the company profiles from the
twitter stream itself. We demonstrate the effectiveness of our methods through an extensive set of
experiments. Moreover, we extensively analyze the sources of errors in the classification. The analysis
not only brings further improvement, but also enables to use the human input more efficiently.
@article{lsirentity,
abstract = {Twitter is a popular micro-blogging service on the Web, where people can enter short messages, which
then become visible to some other users of the service. While the topics of these messages varies, there
are a lot of messages where the users express their opinions about some companies or their products.
These messages are a rich source of information for companies for sentiment analysis or opinion
mining. There is however a great obstacle for analyzing the messages directly: as the company names
are often ambiguous (e.g. apple, the fruit vs. Apple Inc.), one needs first to identify, which messages
are related to the company. In this paper we address this question. We present various techniques
for classifying tweet messages containing a given keyword, whether they are related to a particular
company with that name or not. We first present simple techniques, which make use of company
profiles, which we created semi-automatically from external Web sources. Our advanced techniques
take ambiguity estimations into account and also automatically extend the company profiles from the
twitter stream itself. We demonstrate the effectiveness of our methods through an extensive set of
experiments. Moreover, we extensively analyze the sources of errors in the classification. The analysis
not only brings further improvement, but also enables to use the human input more efficiently.},
added-at = {2013-03-25T17:08:05.000+0100},
author = {LSIR, EPFL IC},
biburl = {https://www.bibsonomy.org/bibtex/29796980bf7429712aadabcf744c93e11/asmelash},
interhash = {f9064a43d2a05a21cca46365864b829b},
intrahash = {9796980bf7429712aadabcf744c93e11},
keywords = {classification conceptExtraction entity twitter},
number = 2,
pages = {88-115},
timestamp = {2013-03-25T17:08:05.000+0100},
title = {Entity-based Classification of Twitter Messages},
url = {http://www.tmrfindia.org/ijcsa/v9i15.pdf},
volume = 9,
year = 2012
}