Documenting large webtext corpora: A case study on the Colossal Clean Crawled Corpus
Authors
Venue
EMNLP
BibTeX
Local Entry
@inproceedings{dodge2021,
title = {Documenting large webtext corpora: A case study on the Colossal Clean Crawled Corpus},
author = {Jesse Dodge and Maarten Sap and Ana Marasovic and William Agnew and Gabriel Ilharco and Dirk Groeneveld and Margaret Mitchell and Matt Gardner},
year = {2021},
booktitle = {EMNLP}
} From AUTO:OPENALEX
@inproceedings{dodge2021,
title = {Documenting Large Webtext Corpora: A Case Study on the Colossal Clean Crawled Corpus},
author = {Jesse Dodge and Maarten Sap and Ana Marasović and William S. Agnew and Gabriel Ilharco and Dirk Groeneveld and Margaret Mitchell and Matt Gardner},
year = {2021},
booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
doi = {10.18653/v1/2021.emnlp-main.98}
}