Please log in to take part in the discussion (add own reviews or comments).
Cite this publication
More citation styles
- please select -
%0 Journal Article
%1 journals/corr/abs-2402-14740
%A Ahmadian, Arash
%A Cremer, Chris
%A Gallé, Matthias
%A Fadaee, Marzieh
%A Kreutzer, Julia
%A Pietquin, Olivier
%A Üstün, Ahmet
%A Hooker, Sara
%D 2024
%J CoRR
%K dblp
%T Back to Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in LLMs.
%U http://dblp.uni-trier.de/db/journals/corr/corr2402.html#abs-2402-14740
%V abs/2402.14740
@article{journals/corr/abs-2402-14740,
added-at = {2024-03-25T00:00:00.000+0100},
author = {Ahmadian, Arash and Cremer, Chris and Gallé, Matthias and Fadaee, Marzieh and Kreutzer, Julia and Pietquin, Olivier and Üstün, Ahmet and Hooker, Sara},
biburl = {https://www.bibsonomy.org/bibtex/210d549e8a84110e3c5079fc1cd1f02b6/dblp},
ee = {https://doi.org/10.48550/arXiv.2402.14740},
interhash = {7c5df59b088f20c791ee71dacc6ccedb},
intrahash = {10d549e8a84110e3c5079fc1cd1f02b6},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-04-08T22:49:28.000+0200},
title = {Back to Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in LLMs.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2402.html#abs-2402-14740},
volume = {abs/2402.14740},
year = 2024
}