Data Leverage References

← Back to browse

Open problems and fundamental limitations of reinforcement learning from human feedback

2023 article casper2023 Not yet verified
Authors
Stephen Casper, Xander Davies, Claudia Shi, Thomas Krendl Gilbert, J'er'emy Scheurer, Javier Rando, Rachel Freedman, Tomasz Korbak, David Lindner, Pedro Freire
Venue
arXiv preprint arXiv:2307.15217

BibTeX

Local Entry
@article{casper2023,
  title = {Open problems and fundamental limitations of reinforcement learning from human feedback},
  author = {Stephen Casper and Xander Davies and Claudia Shi and Thomas Krendl Gilbert and J'er'emy Scheurer and Javier Rando and Rachel Freedman and Tomasz Korbak and David Lindner and Pedro Freire},
  year = {2023},
  journal = {arXiv preprint arXiv:2307.15217}
}
From AUTO:S2
@article{casper2023,
  title = {Open Problems and Fundamental Limitations of Reinforcement Learning from Human Feedback},
  author = {Stephen Casper and Xander Davies and Claudia Shi and T. Gilbert and J'er'emy Scheurer and Javier Rando and Rachel Freedman and Tomasz Korbak and David Lindner and Pedro J Freire and Tony Wang and Samuel Marks and Charbel-Raphaël Ségerie and Micah Carroll and Andi Peng and Phillip J. K. Christoffersen and Mehul Damani and Stewart Slocum and Usman Anwar and Anand Siththaranjan and Max Nadeau and Eric J. Michaud and J. Pfau and Dmitrii Krasheninnikov and Xin Chen and L. Langosco and Peter Hase and Erdem Biyik and A. Dragan and David Krueger and Dorsa Sadigh and Dylan Hadfield-Menell},
  year = {2023},
  journal = {Trans. Mach. Learn. Res.}
}