Direct Preference Optimization: Your Language Model is Secretly a Reward Model
Fields with differences: year. Compare local vs external BibTeX below.
Authors
Tags
Links
Citations
Cited in projects (1)
BibTeX
Local Entry
@article{dpo_paper,
title = {Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
year = {2023},
url = {https://arxiv.org/abs/2305.18290},
eprint = {2305.18290},
archiveprefix = {arXiv}
} From AUTO:S2
@article{dpo_paper,
title = {Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
author = {Rafael Rafailov and Archit Sharma and E. Mitchell and Stefano Ermon and Christopher D. Manning and Chelsea Finn},
year = {2023},
journal = {Neural Information Processing Systems}
}