The Pile: An 800GB Dataset of Diverse Text for Language Modeling
Authors
Venue
CoRR
Tags
Links
BibTeX
Local Entry
@article{pile_paper,
title = {The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
author = {Leo Gao and Stella Biderman and Sid Black and Laurence Golding and Travis Hoppe and Charles Foster and Jason Phang and Horace He and Anish Thite and Noa Nabeshima and Shawn Presser and Connor Leahy},
year = {2021},
journal = {CoRR},
url = {https://arxiv.org/abs/2101.00027},
eprint = {2101.00027},
archiveprefix = {arXiv},
volume = {abs/2101.00027}
} From OPENALEX
@article{pile_paper,
title = {The Pile: An 800GB Dataset of Diverse Text for Language Modeling},
author = {Leo Gao and Stella Biderman and Sid Black and Laurence Golding and Travis Hoppe and Charles Wilmer Foster and Jason Phang and Horace He and Anish Thite and Noa Nabeshima and Shawn Presser and Connor Leahy},
year = {2020},
journal = {arXiv (Cornell University)},
doi = {10.48550/arxiv.2101.00027}
}