Bitte melden Sie sich an um selbst Rezensionen oder Kommentare zu erstellen.
Zitieren Sie diese Publikation
Mehr Zitationsstile
- bitte auswählen -
%0 Journal Article
%1 journals/corr/abs-2312-11456
%A Xiong, Wei
%A Dong, Hanze
%A Ye, Chenlu
%A Zhong, Han
%A Jiang, Nan
%A Zhang, Tong
%D 2023
%J CoRR
%K dblp
%T Gibbs Sampling from Human Feedback: A Provable KL- constrained Framework for RLHF.
%U http://dblp.uni-trier.de/db/journals/corr/corr2312.html#abs-2312-11456
%V abs/2312.11456
@article{journals/corr/abs-2312-11456,
added-at = {2024-08-02T00:00:00.000+0200},
author = {Xiong, Wei and Dong, Hanze and Ye, Chenlu and Zhong, Han and Jiang, Nan and Zhang, Tong},
biburl = {https://www.bibsonomy.org/bibtex/22061fcb8b404b289f5c2b74d114e9cda/dblp},
ee = {https://doi.org/10.48550/arXiv.2312.11456},
interhash = {27a330eecc1ea99a694832ff196d7471},
intrahash = {2061fcb8b404b289f5c2b74d114e9cda},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-08-05T07:07:52.000+0200},
title = {Gibbs Sampling from Human Feedback: A Provable KL- constrained Framework for RLHF.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2312.html#abs-2312-11456},
volume = {abs/2312.11456},
year = 2023
}