Scaling Laws for Neural Language Models
Fields with differences: venue. Compare local vs external BibTeX below.
Authors
Venue
arXiv preprint
Abstract
Establishes power-law scaling relationships between language model performance and model size, dataset size, and compute, spanning seven orders of magnitude.
Tags
Links
BibTeX
Local Entry
@article{kaplan2020scaling,
title = {Scaling Laws for Neural Language Models},
author = {Jared Kaplan and Sam McCandlish and Tom Henighan and Tom B. Brown and Benjamin Chess and Rewon Child and Scott Gray and Alec Radford and Jeffrey Wu and Dario Amodei},
year = {2020},
journal = {arXiv preprint},
url = {https://arxiv.org/abs/2001.08361},
abstract = {Establishes power-law scaling relationships between language model performance and model size, dataset size, and compute, spanning seven orders of magnitude.}
} From AUTO:S2
@article{kaplan2020scaling,
title = {Scaling Laws for Neural Language Models},
author = {J. Kaplan and Sam McCandlish and T. Henighan and Tom B. Brown and Benjamin Chess and R. Child and Scott Gray and Alec Radford and Jeff Wu and Dario Amodei},
year = {2020},
journal = {arXiv.org}
}