{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T08:22:38Z","timestamp":1768033358663,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3545008.3545090","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T01:04:08Z","timestamp":1673744648000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["Lobster: Load Balance-Aware I\/O for Distributed DNN Training"],"prefix":"10.1145","author":[{"given":"Jie","family":"Liu","sequence":"first","affiliation":[{"name":"University of California, Merced, United States of America"}]},{"given":"Bogdan","family":"Nicolae","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, United States of America"}]},{"given":"Dong","family":"Li","sequence":"additional","affiliation":[{"name":"University of California, Merced, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467(2016).","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Greg\u00a0S Corrado, Andy Davis, Jeffrey Dean, Matthieu Devin, 2016. Tensorflow: Large-scale machine learning on heterogeneous distributed systems. arXiv preprint arXiv:1603.04467(2016)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/3485450.3485462"},{"key":"e_1_3_2_1_3_1","volume-title":"Nvidia Data Center Processing Unit (DPU) Architecture. In 2021 IEEE Hot Chips 33 Symposium (HCS). IEEE, 1\u201320","author":"Burstein Idan","year":"2021","unstructured":"Idan Burstein. 2021. Nvidia Data Center Processing Unit (DPU) Architecture. In 2021 IEEE Hot Chips 33 Symposium (HCS). IEEE, 1\u201320."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392771"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454151"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356147"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00067"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476181"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MLHPC.2016.004"},{"key":"e_1_3_2_1_10_1","volume-title":"Deep Learning","author":"Goodfellow Ian","unstructured":"Ian Goodfellow, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. MIT Press."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"A Handa V Patraucean V Badrinarayanan S Stent and R Cipolla. 2015. SceneNet: Understanding real world indoor scenes with synthetic data. arXiv preprint arXiv:1511.07041(2015).","DOI":"10.1109\/CVPR.2016.442"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"key":"e_1_3_2_1_14_1","volume-title":"4th Conference on Machine Learning and Systems. Virtual.","author":"Ivanov Andrei","year":"2021","unstructured":"Andrei Ivanov, Nikoli Dryden, Tal Ben-Nun, Shigang Li, and Torsten Hoefler. 2021. Data movement is all you need: A case study on optimizing transformers. In 4th Conference on Machine Learning and Systems. Virtual."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_16_1","volume-title":"11th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 19).","author":"Kakaraparthy Aarati","unstructured":"Aarati Kakaraparthy, Abhay Venkatesh, Amar Phanishayee, and Shivaram Venkataraman. 2019. The case for unifying data loading in machine learning clusters. In 11th {USENIX} Workshop on Hot Topics in Cloud Computing (HotCloud 19)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303957"},{"key":"e_1_3_2_1_18_1","unstructured":"Alex Krizhevsky Ilya Sutskever and Geoffrey\u00a0E Hinton. 2012. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems. 1097\u20131105."},{"key":"e_1_3_2_1_19_1","volume-title":"18th USENIX Conference on File and Storage Technologies. 283\u2013296","author":"Kumar Abhishek\u00a0Vijaya","year":"2020","unstructured":"Abhishek\u00a0Vijaya Kumar and Muthian Sivathanu. 2020. Quiver: An informed storage cache for deep learning. In 18th USENIX Conference on File and Storage Technologies. 283\u2013296."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00054"},{"key":"e_1_3_2_1_21_1","volume-title":"Refurbish Your Training Data: Reusing Partially Augmented Samples for Faster Deep Neural Network Training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21)","author":"Lee Gyewon","year":"2021","unstructured":"Gyewon Lee, Irene Lee, Hyeonmin Ha, Kyunggeun Lee, Hwarim Hyun, Ahnjae Shin, and Byung-Gon Chun. 2021. Refurbish Your Training Data: Reusing Partially Augmented Samples for Faster Deep Neural Network Training. In 2021 USENIX Annual Technical Conference (USENIX ATC 21). 537\u2013550."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470463"},{"key":"e_1_3_2_1_23_1","unstructured":"Shen Li Yanli Zhao Rohan Varma Omkar Salpekar Pieter Noordhuis Teng Li Adam Paszke Jeff Smith Brian Vaughan Pritam Damania 2020. Pytorch distributed: Experiences on accelerating data parallel training. arXiv preprint arXiv:2006.15704(2020)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526058.3535453"},{"key":"e_1_3_2_1_25_1","volume-title":"The Case for Storage Optimization Decoupling in Deep Learning Frameworks. In IEEE International Conference on Cluster Computing. IEEE, 649\u2013656","author":"Macedo Ricardo","year":"2021","unstructured":"Ricardo Macedo, Cl\u00e1udia Correia, Marco Dantas, Cl\u00e1udia Brito, Weijia Xu, Yusuke Tanimura, Jason Haga, and Joao Paulo. 2021. The Case for Storage Optimization Decoupling in Deep Learning Frameworks. In IEEE International Conference on Cluster Computing. IEEE, 649\u2013656."},{"key":"e_1_3_2_1_26_1","unstructured":"Jayashree Mohan Amar Phanishayee Ashish Raniwala and Vijay Chidambaram. 2020. Analyzing and mitigating data stalls in DNN training. arXiv preprint arXiv:2007.06775(2020)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407816"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_1_29_1","volume-title":"1st Conference on Systems and Machine Learning (SysML), SysML, Vol.\u00a018","author":"Narayanan Deepak","year":"2018","unstructured":"Deepak Narayanan, Keshav Santhanam, and Matei Zaharia. 2018. Accelerating model search with model batching. In 1st Conference on Systems and Machine Learning (SysML), SysML, Vol.\u00a018."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00072"},{"key":"e_1_3_2_1_31_1","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in Neural Information Processing Systems 32 (2019), 8026\u20138037.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","unstructured":"Daniel Povey Xiaohui Zhang and Sanjeev Khudanpur. 2014. Parallel training of deep neural networks with natural gradient and parameter averaging. arXiv preprint arXiv:1410.7455(2014)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Samyam Rajbhandari Olatunji Ruwase Jeff Rasley Shaden Smith and Yuxiong He. 2021. ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning. arXiv preprint arXiv:2104.07857(2021).","DOI":"10.1145\/3458817.3476205"},{"key":"e_1_3_2_1_34_1","volume-title":"Zero-offload: Democratizing billion-scale model training. arXiv preprint arXiv:2101.06840(2021).","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Reza\u00a0Yazdani Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. 2021. Zero-offload: Democratizing billion-scale model training. arXiv preprint arXiv:2101.06840(2021)."},{"key":"e_1_3_2_1_35_1","unstructured":"Tal Ridnik Emanuel Ben-Baruch Asaf Noy and Lihi Zelnik-Manor. 2021. Imagenet-21k pretraining for the masses. arXiv preprint arXiv:2104.10972(2021)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_37_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNSM.2013.052113.120382"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404472"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460365"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_2_1_42_1","unstructured":"Mark Zhao Niket Agarwal Aarti Basant Bugra Gedik Satadru Pan Mustafa Ozdal Rakesh Komuravelli Jerry Pan Tianshu Bao Haowei Lu 2021. Understanding and co-designing the data ingestion pipeline for industry-scale recsys training. arXiv preprint arXiv:2108.09373(2021)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2018.00023"},{"key":"e_1_3_2_1_44_1","unstructured":"Mahdi Zolnouri Xinlin Li and Vahid\u00a0Partovi Nia. 2020. Importance of data loading pipeline in training DNNs. arXiv preprint arXiv:2005.02130(2020)."}],"event":{"name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France","acronym":"ICPP '22"},"container-title":["Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545090","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3545008.3545090","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:44Z","timestamp":1750186964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545090"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":44,"alternative-id":["10.1145\/3545008.3545090","10.1145\/3545008"],"URL":"https:\/\/doi.org\/10.1145\/3545008.3545090","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}