{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,24]],"date-time":"2025-09-24T09:39:54Z","timestamp":1758706794345,"version":"3.41.0"},"reference-count":4,"publisher":"Association for Computing Machinery (ACM)","issue":"7","license":[{"start":{"date-parts":[[2024,7,1]],"date-time":"2024-07-01T00:00:00Z","timestamp":1719792000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":["Commun. ACM"],"published-print":{"date-parts":[[2024,7]]},"abstract":"<jats:p>Size matters in machine learning, but will this continue to be true?<\/jats:p>","DOI":"10.1145\/3647631","type":"journal-article","created":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T21:16:19Z","timestamp":1712956579000},"page":"8-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Data Quality May Be All You Need"],"prefix":"10.1145","volume":"67","author":[{"given":"Chris","family":"Edwards","sequence":"first","affiliation":[{"name":"Surrey, U.K"}]}],"member":"320","published-online":{"date-parts":[[2024,7,2]]},"reference":[{"key":"e_1_3_1_2_1","unstructured":"Hoffman J. et al. An empirical analysis of compute-optimal large language model training Proceedings of Advances in Neural Information Processing Systems 35 (NeurIPS) arXiv:2203.15556 (2022)"},{"key":"e_1_3_1_3_1","unstructured":"Villalobos P. Sevilla J. Heim L. Besiroglu T. Hobbhahn M. and Ho A. Will we run out of data? An analysis of the limits of scaling datasets in machine learning arXiv:2211.04325 (2022)"},{"key":"e_1_3_1_4_1","unstructured":"Gunasekar S. et al. Textbooks are all you need arXiv:2306:11644 (2023)"},{"key":"e_1_3_1_5_1","unstructured":"Muennighoff N. Rush A.M. Barak B. Le Scao T. Piktus A. Tazi N. Pyysalo S. Wolf T. and Raffel C. Scaling data-constrained language models Proceedings of Advances in Neural Information Processing Systems 36 (NeurIPS) arXiv:2305:16264"}],"container-title":["Communications of the ACM"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3647631","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3647631","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:38Z","timestamp":1750291418000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3647631"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7]]},"references-count":4,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2024,7]]}},"alternative-id":["10.1145\/3647631"],"URL":"https:\/\/doi.org\/10.1145\/3647631","relation":{},"ISSN":["0001-0782","1557-7317"],"issn-type":[{"type":"print","value":"0001-0782"},{"type":"electronic","value":"1557-7317"}],"subject":[],"published":{"date-parts":[[2024,7]]},"assertion":[{"value":"2024-07-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}