Please log in to take part in the discussion (add own reviews or comments).
Cite this publication
More citation styles
- please select -
%0 Generic
%1 rafailov2023direct
%A Rafailov, Rafael
%A Sharma, Archit
%A Mitchell, Eric
%A Ermon, Stefano
%A Manning, Christopher D.
%A Finn, Chelsea
%D 2023
%K feedback nlp
%T Direct Preference Optimization: Your Language Model is Secretly a Reward Model
@misc{rafailov2023direct,
added-at = {2024-04-19T12:49:40.000+0200},
archiveprefix = {arXiv},
author = {Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Ermon, Stefano and Manning, Christopher D. and Finn, Chelsea},
biburl = {https://www.bibsonomy.org/bibtex/20e2dfe25ee07c86585f5c1f7a5be3d92/jonas.kaiser},
description = {[2305.18290] Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
eprint = {2305.18290},
interhash = {5de55bbe56b91e84c7e7d39ba86548ab},
intrahash = {0e2dfe25ee07c86585f5c1f7a5be3d92},
keywords = {feedback nlp},
primaryclass = {cs.LG},
timestamp = {2024-04-19T12:49:40.000+0200},
title = {Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
year = 2023
}