Is Model Collapse Inevitable? Breaking the Curse of Recursion by Accumulating Real and Synthetic Data
Authors
Venue
arXiv preprint
Abstract
Studies whether model collapse is inevitable. Found that collapse occurs when replacing real data with synthetic data each generation. However, when accumulating synthetic data alongside original real data, models stay stable across sizes and modalities. Suggests data accumulation rather than replacement as a solution.
Tags
Links
BibTeX
Local Entry
@article{gerstgrasser2024modelcollapseaccumulate,
title = {Is Model Collapse Inevitable? Breaking the Curse of Recursion by Accumulating Real and Synthetic Data},
author = {Matthias Gerstgrasser and Rylan Schaeffer and Apratim Dey and Rafael Rafailov and Henry Sleight and John Hughes and Tomasz Korbak and Rajashree Agrawal and Dhruv Pai and Andrey Gromov and Daniel A. Roberts and Diyi Yang and David L. Donoho and Sanmi Koyejo},
year = {2024},
journal = {arXiv preprint},
url = {https://arxiv.org/abs/2404.01413},
abstract = {Studies whether model collapse is inevitable. Found that collapse occurs when replacing real data with synthetic data each generation. However, when accumulating synthetic data alongside original real data, models stay stable across sizes and modalities. Suggests data accumulation rather than replacement as a solution.}
} From AUTO:OPENALEX
@article{gerstgrasser2024modelcollapseaccumulate,
title = {Is Model Collapse Inevitable? Breaking the Curse of Recursion by Accumulating Real and Synthetic Data},
author = {Matthias Gerstgrasser and Rylan Schaeffer and Apratim Dey and Rafael Rafailov and Henry Sleight and John D. Hughes and Tomasz Korbak and Rajashree Agrawal and Dhruv Pai and Andrey Gromov and Daniel A. Roberts and Diyi Yang and David L. Donoho and Sanmi Koyejo},
year = {2024},
journal = {arXiv (Cornell University)},
doi = {10.48550/arxiv.2404.01413}
}