Scaling Laws for Neural Language Models
Authors
Venue
arXiv preprint
Abstract
Establishes power-law scaling relationships between language model performance and model size, dataset size, and compute, spanning seven orders of magnitude.
Tags
Links
BibTeX
Local Entry
@article{kaplan2020scaling,
title = {Scaling Laws for Neural Language Models},
author = {Jared Kaplan and Sam McCandlish and Tom Henighan and Tom B. Brown and Benjamin Chess and Rewon Child and Scott Gray and Alec Radford and Jeffrey Wu and Dario Amodei},
year = {2020},
journal = {arXiv preprint},
url = {https://arxiv.org/abs/2001.08361},
abstract = {Establishes power-law scaling relationships between language model performance and model size, dataset size, and compute, spanning seven orders of magnitude.}
} From OPENALEX
@article{kaplan2020scaling,
title = {Scaling Laws for Neural Language Models},
author = {Jared Kaplan and Sam McCandlish and Tom Henighan and T. B. Brown and Benjamin Chess and Rewon Child and Scott Gray and Alec Radford and Jeffrey Wu and Dario Amodei},
year = {2020},
journal = {arXiv (Cornell University)},
doi = {10.48550/arxiv.2001.08361}
}