@jonaskaiser4 years ago
(last updated 4 years ago)
Nicht in Ausarbeitung genutzt, da BERT als Thema gewählt wurde und zur Betrachtung alternativer Modelle nur beschränkt Platz verfügbar ist.
References
Bookmarks
deleting review
Please log in to take part in the discussion (add own reviews or comments).
Cite this publication
More citation styles
- please select -
%0 Journal Article
%1 journals/corr/abs-1909-08053
%A Shoeybi, Mohammad
%A Patwary, Mostofa
%A Puri, Raul
%A LeGresley, Patrick
%A Casper, Jared
%A Catanzaro, Bryan
%D 2019
%J CoRR
%K
%T Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism.
%U http://dblp.uni-trier.de/db/journals/corr/corr1909.html#abs-1909-08053
%V abs/1909.08053
@article{journals/corr/abs-1909-08053,
added-at = {2020-07-14T17:15:57.000+0200},
author = {Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
biburl = {https://www.bibsonomy.org/bibtex/2b06a23e52f3ed17d43e4160503b7bf7b/jonaskaiser},
ee = {http://arxiv.org/abs/1909.08053},
interhash = {cd4e6ad8c8e1319cbde05fe82576070b},
intrahash = {b06a23e52f3ed17d43e4160503b7bf7b},
journal = {CoRR},
keywords = {},
timestamp = {2020-07-14T17:15:57.000+0200},
title = {Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr1909.html#abs-1909-08053},
volume = {abs/1909.08053},
year = 2019
}