This research creates an architecture for investigating the existence of probable lexical divergences between articles, categorized as Institute for Scientific Information ( ISI) and non- ISI, and consequently, if such a difference is discovered, to propose the best available classification method. Based on a collection of ISI- and non- ISI-indexed articles in the areas of business and computer science, three classification models are trained. A sensitivity analysis is applied to demonstrate the impact of words in different syntactical forms on the classification decision. The results demonstrate that the lexical domains of ISI and non- ISI articles are distinguishable by machine learning techniques. Our findings indicate that the support vector machine identifies ISI-indexed articles in both disciplines with higher precision than do the Naïve Bayesian and K- Nearest Neighbors techniques.
%0 Journal Article
%1 moohebat_identifying_2015
%A Moohebat, Mohammadreza
%A Raj, Ram Gopal
%A Kareem, Sameem Binti Abdul
%A Thorleuchter, Dirk
%D 2015
%J Journal of the Association for Information Science & Technology
%K terminologieextraktion
%N 3
%P 501--511
%R 10.1002/asi.23194
%T Identifying ISI-indexed articles by their lexical usage : a text analysis approach
%V 66
%X This research creates an architecture for investigating the existence of probable lexical divergences between articles, categorized as Institute for Scientific Information ( ISI) and non- ISI, and consequently, if such a difference is discovered, to propose the best available classification method. Based on a collection of ISI- and non- ISI-indexed articles in the areas of business and computer science, three classification models are trained. A sensitivity analysis is applied to demonstrate the impact of words in different syntactical forms on the classification decision. The results demonstrate that the lexical domains of ISI and non- ISI articles are distinguishable by machine learning techniques. Our findings indicate that the support vector machine identifies ISI-indexed articles in both disciplines with higher precision than do the Naïve Bayesian and K- Nearest Neighbors techniques.
@article{moohebat_identifying_2015,
abstract = {This research creates an architecture for investigating the existence of probable lexical divergences between articles, categorized as Institute for Scientific Information ( ISI) and non- ISI, and consequently, if such a difference is discovered, to propose the best available classification method. Based on a collection of ISI- and non- ISI-indexed articles in the areas of business and computer science, three classification models are trained. A sensitivity analysis is applied to demonstrate the impact of words in different syntactical forms on the classification decision. The results demonstrate that the lexical domains of ISI and non- ISI articles are distinguishable by machine learning techniques. Our findings indicate that the support vector machine identifies ISI-indexed articles in both disciplines with higher precision than do the Naïve Bayesian and K- Nearest Neighbors techniques.},
added-at = {2018-11-04T17:02:36.000+0100},
author = {Moohebat, Mohammadreza and Raj, Ram Gopal and Kareem, Sameem Binti Abdul and Thorleuchter, Dirk},
biburl = {https://www.bibsonomy.org/bibtex/26181aee33f9a85217c815243d094ab5d/lepsky},
doi = {10.1002/asi.23194},
interhash = {f655f2192b3d8823824b0bf37250439c},
intrahash = {6181aee33f9a85217c815243d094ab5d},
issn = {23301635},
journal = {Journal of the Association for Information Science \& Technology},
keywords = {terminologieextraktion},
month = mar,
number = 3,
pages = {501--511},
shorttitle = {Identifying {ISI}-indexed articles by their lexical usage},
timestamp = {2018-11-04T17:02:36.000+0100},
title = {Identifying {ISI}-indexed articles by their lexical usage : a text analysis approach},
volume = 66,
year = 2015
}