Please log in to take part in the discussion (add own reviews or comments).
Cite this publication
More citation styles
- please select -
%0 Generic
%1 jacobs2023deepspeedulyssesoptimizationsenabling
%A Jacobs, Sam Ade
%A Tanaka, Masahiro
%A Zhang, Chengming
%A Zhang, Minjia
%A Song, Shuaiwen Leon
%A Rajbhandari, Samyam
%A He, Yuxiong
%D 2023
%K SequenceParallel llm
%T DeepSpeed Ulysses: System Optimizations for Enabling Training of Extreme Long Sequence Transformer Models
%U https://arxiv.org/abs/2309.14509
@misc{jacobs2023deepspeedulyssesoptimizationsenabling,
added-at = {2024-09-30T23:35:36.000+0200},
archiveprefix = {arXiv},
author = {Jacobs, Sam Ade and Tanaka, Masahiro and Zhang, Chengming and Zhang, Minjia and Song, Shuaiwen Leon and Rajbhandari, Samyam and He, Yuxiong},
biburl = {https://www.bibsonomy.org/bibtex/2689096b49fbfd4bab76802b6344c9e66/farshaad10410},
description = {DeepSpeed Ulysses: System Optimizations for Enabling Training of Extreme Long Sequence Transformer Models},
eprint = {2309.14509},
interhash = {f845a76a356ab7308fd812ba44238479},
intrahash = {689096b49fbfd4bab76802b6344c9e66},
keywords = {SequenceParallel llm},
primaryclass = {cs.LG},
timestamp = {2024-09-30T23:35:36.000+0200},
title = {DeepSpeed Ulysses: System Optimizations for Enabling Training of Extreme Long Sequence Transformer Models},
url = {https://arxiv.org/abs/2309.14509},
year = 2023
}