AI models collapse when trained on recursively generated data
Authors
Venue
Nature
Abstract
Landmark study showing that indiscriminate use of model-generated content in training causes irreversible defects in resulting models, where tails of original content distribution disappear. Model collapse is a degenerative learning process where models forget improbable events over time. Demonstrates this across LLMs, VAEs, and GMMs.
Tags
Links
BibTeX
Local Entry
@article{shumailov2024modelcollapse,
title = {AI models collapse when trained on recursively generated data},
author = {Ilia Shumailov and Zakhar Shumaylov and Yiren Zhao and Nicolas Papernot and Ross Anderson and Yarin Gal},
year = {2024},
journal = {Nature},
url = {https://www.nature.com/articles/s41586-024-07566-y},
abstract = {Landmark study showing that indiscriminate use of model-generated content in training causes irreversible defects in resulting models, where tails of original content distribution disappear. Model collapse is a degenerative learning process where models forget improbable events over time. Demonstrates this across LLMs, VAEs, and GMMs.}
} From AUTO:OPENALEX
@article{shumailov2024modelcollapse,
title = {AI models collapse when trained on recursively generated data},
author = {Ilia Shumailov and Zakhar Shumaylov and Yiren Zhao and Nicolas Papernot and Ross Anderson and Yarin Gal},
year = {2024},
journal = {Nature},
doi = {10.1038/s41586-024-07566-y}
}