The word2vec model has been previously shown to be successful in creating numerical representations of words (word embeddings) that capture the semantic and syntactic meanings of words. This study examines the issue of model stability in terms of how consistent these representations are given a specific corpus and set of model parameters. Specifically, the study considers the impact of word embedding dimension size and frequency of words on stability. Stability is measured by comparing the neighborhood of words in the word vector space model. Our results demonstrate that the dimension size of word embeddings has a significant effect on the consistency of the model. In addition, the effect of the frequency of the target words on stability is identified. An approach to mitigate the effects of word frequency on stability is proposed.
Chugh et al - Stability of Word Embeddings Using Word2Vec.pdf:C\:\\Users\\Admin\\Documents\\Research\\_Paperbase\\Word Embeddings\\Chugh et al - Stability of Word Embeddings Using Word2Vec.pdf:application/pdf
%0 Book Section
%1 mitrovic_stability_2018
%A Chugh, Mansi
%A Whigham, Peter A.
%A Dick, Grant
%B AI 2018: Advances in Artificial Intelligence
%C Cham
%D 2018
%E Mitrovic, Tanja
%E Xue, Bing
%E Li, Xiaodong
%I Springer International Publishing
%K Embedding_Variability Word_Embeddings
%P 812--818
%R 10.1007/978-3-030-03991-2_73
%T Stability of Word Embeddings Using Word2Vec
%U http://link.springer.com/10.1007/978-3-030-03991-2_73
%V 11320
%X The word2vec model has been previously shown to be successful in creating numerical representations of words (word embeddings) that capture the semantic and syntactic meanings of words. This study examines the issue of model stability in terms of how consistent these representations are given a specific corpus and set of model parameters. Specifically, the study considers the impact of word embedding dimension size and frequency of words on stability. Stability is measured by comparing the neighborhood of words in the word vector space model. Our results demonstrate that the dimension size of word embeddings has a significant effect on the consistency of the model. In addition, the effect of the frequency of the target words on stability is identified. An approach to mitigate the effects of word frequency on stability is proposed.
%@ 978-3-030-03990-5 978-3-030-03991-2
@incollection{mitrovic_stability_2018,
abstract = {The word2vec model has been previously shown to be successful in creating numerical representations of words (word embeddings) that capture the semantic and syntactic meanings of words. This study examines the issue of model stability in terms of how consistent these representations are given a specific corpus and set of model parameters. Specifically, the study considers the impact of word embedding dimension size and frequency of words on stability. Stability is measured by comparing the neighborhood of words in the word vector space model. Our results demonstrate that the dimension size of word embeddings has a significant effect on the consistency of the model. In addition, the effect of the frequency of the target words on stability is identified. An approach to mitigate the effects of word frequency on stability is proposed.},
added-at = {2020-02-21T16:09:44.000+0100},
address = {Cham},
author = {Chugh, Mansi and Whigham, Peter A. and Dick, Grant},
biburl = {https://www.bibsonomy.org/bibtex/222b2e24a0f265bb23b38d402e9a7f5d4/tschumacher},
booktitle = {{AI} 2018: {Advances} in {Artificial} {Intelligence}},
doi = {10.1007/978-3-030-03991-2_73},
editor = {Mitrovic, Tanja and Xue, Bing and Li, Xiaodong},
file = {Chugh et al - Stability of Word Embeddings Using Word2Vec.pdf:C\:\\Users\\Admin\\Documents\\Research\\_Paperbase\\Word Embeddings\\Chugh et al - Stability of Word Embeddings Using Word2Vec.pdf:application/pdf},
interhash = {e1791b1b87b142330b9d27cefc972ce3},
intrahash = {22b2e24a0f265bb23b38d402e9a7f5d4},
isbn = {978-3-030-03990-5 978-3-030-03991-2},
keywords = {Embedding_Variability Word_Embeddings},
language = {en},
pages = {812--818},
publisher = {Springer International Publishing},
timestamp = {2020-02-21T16:09:44.000+0100},
title = {Stability of {Word} {Embeddings} {Using} {Word2Vec}},
url = {http://link.springer.com/10.1007/978-3-030-03991-2_73},
urldate = {2019-12-11},
volume = 11320,
year = 2018
}