We propose two novel model architectures for computing continuous vector
representations of words from very large data sets. The quality of these
representations is measured in a word similarity task, and the results are
compared to the previously best performing techniques based on different types
of neural networks. We observe large improvements in accuracy at much lower
computational cost, i.e. it takes less than a day to learn high quality word
vectors from a 1.6 billion words data set. Furthermore, we show that these
vectors provide state-of-the-art performance on our test set for measuring
syntactic and semantic word similarities.
Description
[1301.3781] Efficient Estimation of Word Representations in Vector Space
%0 Generic
%1 mikolov2013efficient
%A Mikolov, Tomas
%A Chen, Kai
%A Corrado, Greg
%A Dean, Jeffrey
%D 2013
%K masterthesis word-embeddings word2vec
%T Efficient Estimation of Word Representations in Vector Space
%U http://arxiv.org/abs/1301.3781
%X We propose two novel model architectures for computing continuous vector
representations of words from very large data sets. The quality of these
representations is measured in a word similarity task, and the results are
compared to the previously best performing techniques based on different types
of neural networks. We observe large improvements in accuracy at much lower
computational cost, i.e. it takes less than a day to learn high quality word
vectors from a 1.6 billion words data set. Furthermore, we show that these
vectors provide state-of-the-art performance on our test set for measuring
syntactic and semantic word similarities.
@misc{mikolov2013efficient,
abstract = {We propose two novel model architectures for computing continuous vector
representations of words from very large data sets. The quality of these
representations is measured in a word similarity task, and the results are
compared to the previously best performing techniques based on different types
of neural networks. We observe large improvements in accuracy at much lower
computational cost, i.e. it takes less than a day to learn high quality word
vectors from a 1.6 billion words data set. Furthermore, we show that these
vectors provide state-of-the-art performance on our test set for measuring
syntactic and semantic word similarities.},
added-at = {2021-01-07T13:28:19.000+0100},
author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
biburl = {https://www.bibsonomy.org/bibtex/28b132b4b7e82cfb538fd462887ba98b8/festplatte},
description = {[1301.3781] Efficient Estimation of Word Representations in Vector Space},
interhash = {e92df552b17e9f952226a893b84ad739},
intrahash = {8b132b4b7e82cfb538fd462887ba98b8},
keywords = {masterthesis word-embeddings word2vec},
note = {cite arxiv:1301.3781},
timestamp = {2021-01-07T13:28:19.000+0100},
title = {Efficient Estimation of Word Representations in Vector Space},
url = {http://arxiv.org/abs/1301.3781},
year = 2013
}