{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T22:23:25Z","timestamp":1766269405839,"version":"3.37.3"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006190","name":"Research and Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006190","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1109\/hpca56546.2023.10070964","type":"proceedings-article","created":{"date-parts":[[2023,3,24]],"date-time":"2023-03-24T17:42:55Z","timestamp":1679679775000},"page":"220-232","source":"Crossref","is-referenced-by-count":14,"title":["iCache: An Importance-Sampling-Informed Cache for Accelerating I\/O-Bound DNN Model Training"],"prefix":"10.1109","author":[{"given":"Weijian","family":"Chen","sequence":"first","affiliation":[{"name":"Zhejiang University"}]},{"given":"Shuibing","family":"He","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Yaowen","family":"Xu","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Xuechen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Washington State University Vancouver"}]},{"given":"Siling","family":"Yang","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Shuang","family":"Hu","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Xian-He","family":"Sun","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology"}]},{"given":"Gang","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]}],"member":"263","reference":[{"year":"2021","key":"ref1","article-title":"Orange File System"},{"key":"ref2","first-page":"265","article-title":"Tensorflow: A System for Large-Scale Machine Learning","volume-title":"Proceedings of the 12th USENIX Symposium on Operating Systems Design and Implementation","author":"Abadi"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9005703"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3193867"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00019"},{"journal-title":"Faster Neural Network Training With Data Echoing","year":"2019","author":"Choi","key":"ref6"},{"key":"ref7","article-title":"Ubershuffle: Communication-Efficient Data Shuffling for SGD via Coding Theory","author":"Chung","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507743"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00097"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref11","article-title":"Deep learning: Adaptive Computation and Machine Learning Series","author":"Goodfellow","year":"2017","journal-title":"Cambridge Massachusetts"},{"year":"2018","key":"ref12","article-title":"Open images dataset"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"journal-title":"Mobilenets: Efficient Convolutional Neural Networks for Mobile Vision Applications","year":"2017","author":"Howard","key":"ref14"},{"journal-title":"Densenet: Implementing Efficient Convnet Descriptor Pyramids","year":"2014","author":"Iandola","key":"ref15"},{"journal-title":"SqueezeNet: AlexNet-Level Accuracy with 50x Fewer Parameters and\u00a1 0.5 MB Model Size","year":"2016","author":"Iandola","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3361525.3361538"},{"journal-title":"Accelerating Deep Learning by Focusing on The Biggest Losers","year":"2019","author":"Jiang","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/511399.511340"},{"key":"ref20","article-title":"Training Deep Models Faster with Robust, Approximate Importance Sampling","author":"Johnson","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref21","first-page":"283","article-title":"The Case for Unifying Data Loading in Machine Learning Clusters","volume-title":"Proceedings of the 11th USENIX Workshop on Hot Topics in Cloud Computing","author":"Kakaraparthy"},{"journal-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition","year":"2014","author":"Simonyan","key":"ref22"},{"journal-title":"Biased Importance Sampling for Deep Neural Network Training","year":"2017","author":"Katharopoulos","key":"ref23"},{"key":"ref24","first-page":"2525","article-title":"Not all Samples are Created Equal: Deep Learning with Importance Sampling","volume-title":"Proceedings of the International Conference on Machine Learning","author":"Katharopoulos"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037732"},{"key":"ref26","article-title":"Learning Multiple Layers of Features From Tiny Images","volume-title":"Tech. Rep.","author":"Krizhevsky","year":"2009"},{"key":"ref27","first-page":"283","article-title":"Quiver: An Informed Storage Cache for Deep Learning","volume-title":"Proceedings of the 18th USENIX Conference on File and Storage Technologies","author":"Kumar"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00054"},{"key":"ref29","first-page":"537","article-title":"Refurbish Your Training Data: Reusing Partially Augmented Samples for Faster Deep Neural Network Training","volume-title":"Proceedings of USENIX Annual Technical Conference","author":"Lee"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2022.3173707"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1441"},{"journal-title":"Online Batch Selection for Faster Training of Neural Networks","year":"2015","author":"Loshchilov","key":"ref32"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00096"},{"key":"ref34","first-page":"248","article-title":"ARC: A Self-Tuning, Low Overhead Replacement Cache","volume-title":"Proceedings of the 2nd USENIX Conference on File and Storage Technologies","author":"Megiddo"},{"year":"2021","key":"ref35","article-title":"Analyzing and Mitigating Data Stalls in DNN Training"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.14778\/3446095.3446100"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1080\/01691864.2017.1365009"},{"journal-title":"Hoard: A Distributed Data Caching System to Accelerate Deep Learning Training on the Cloud","year":"2018","author":"Pinto","key":"ref38"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3331526"},{"year":"2021","key":"ref40","article-title":"PyTorch\/Vision"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54024-5_6"},{"key":"ref42","first-page":"401","article-title":"Towards Scalable Distributed Training of Deep Learning on Public Cloud Clusters","volume-title":"Proceedings of Machine Learning and Systems","volume":"3","author":"Shi"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3404397.3404472"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO56248.2022.00091"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/155870.155881"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/PDSW54622.2021.00009"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2019.00037"},{"journal-title":"Autoassist: A Framework to Accelerate Training of Deep Neural Networks","year":"2019","author":"Zhang","key":"ref49"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00050"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533044"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/MASCOTS.2018.00023"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2019.8891023"}],"event":{"name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","start":{"date-parts":[[2023,2,25]]},"location":"Montreal, QC, Canada","end":{"date-parts":[[2023,3,1]]}},"container-title":["2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10070856\/10070923\/10070964.pdf?arnumber=10070964","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T13:19:12Z","timestamp":1707830352000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10070964\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/hpca56546.2023.10070964","relation":{},"subject":[],"published":{"date-parts":[[2023,2]]}}}