{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:01:45Z","timestamp":1750309305131,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":108,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,4,27]],"date-time":"2024-04-27T00:00:00Z","timestamp":1714176000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,4,27]]},"DOI":"10.1145\/3620666.3651345","type":"proceedings-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T12:08:21Z","timestamp":1713960501000},"page":"689-707","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["NDPipe: Exploiting Near-data Processing for Scalable Inference and Continuous Training in Photo Storage"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3953-7895","authenticated-orcid":false,"given":"Jungwoo","family":"Kim","sequence":"first","affiliation":[{"name":"DGIST, Daegu, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1020-9444","authenticated-orcid":false,"given":"Seonggyun","family":"Oh","sequence":"additional","affiliation":[{"name":"DGIST, Daegu, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6151-8602","authenticated-orcid":false,"given":"Jaeha","family":"Kung","sequence":"additional","affiliation":[{"name":"Korea University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5947-9632","authenticated-orcid":false,"given":"Yeseong","family":"Kim","sequence":"additional","affiliation":[{"name":"DGIST, Daegu, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9753-2286","authenticated-orcid":false,"given":"Sungjin","family":"Lee","sequence":"additional","affiliation":[{"name":"DGIST, Daegu, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2024,4,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/blog.hootsuite.com\/facebook-statistics\/","author":"Marketers Facebook Statistics","year":"2023","unstructured":"42 Facebook Statistics Marketers Need to Know in 2023. Christina Newberry. https:\/\/blog.hootsuite.com\/facebook-statistics\/, 2023."},{"key":"e_1_3_2_1_2_1","volume-title":"https:\/\/blog.google\/products\/photos\/google-photos-500-million-new-sharing\/","author":"Photos Google","year":"2017","unstructured":"500 million people using Google Photos, and three new ways to share. Anil Sabharwal. https:\/\/blog.google\/products\/photos\/google-photos-500-million-new-sharing\/, 2017."},{"key":"e_1_3_2_1_3_1","volume-title":"TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/","author":"Abadi Mart\u00edn","year":"2015","unstructured":"Mart\u00edn Abadi, Ashish Agarwal, Paul Barham, Eugene Brevdo, Zhifeng Chen, Craig Citro, Greg S. Corrado, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Ian Goodfellow, Andrew Harp, Geoffrey Irving, Michael Isard, Yangqing Jia, Rafal Jozefowicz, Lukasz Kaiser, Manjunath Kudlur, Josh Levenberg, Dandelion Man\u00e9, Rajat Monga, Sherry Moore, Derek Murray, Chris Olah, Mike Schuster, Jonathon Shlens, Benoit Steiner, Ilya Sutskever, Kunal Talwar, Paul Tucker, Vincent Vanhoucke, Vijay Vasudevan, Fernanda Vi\u00e9gas, Oriol Vinyals, Pete Warden, Martin Wattenberg, Martin Wicke, Yuan Yu, and Xiaoqiang Zheng. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/, 2015."},{"key":"e_1_3_2_1_4_1","volume-title":"Docbert: Bert for document classification. arXiv preprint arXiv:1904.08398","author":"Adhikari Ashutosh","year":"2019","unstructured":"Ashutosh Adhikari, Achyudh Ram, Raphael Tang, and Jimmy Lin. Docbert: Bert for document classification. arXiv preprint arXiv:1904.08398, 2019."},{"key":"e_1_3_2_1_5_1","volume-title":"Amazon S3: Object storage built to retrieve any amount of data from anywhere. https:\/\/aws.amazon.com\/s3\/?nc1=h_ls","author":"Amazon Web Services Inc.","year":"2022","unstructured":"Amazon Web Services Inc. Amazon S3: Object storage built to retrieve any amount of data from anywhere. https:\/\/aws.amazon.com\/s3\/?nc1=h_ls, 2022."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/aws.amazon.com\/ec2\/","author":"Amazon Web Services Inc. Amazon EC2.","year":"2023","unstructured":"Amazon Web Services Inc. Amazon EC2. https:\/\/aws.amazon.com\/ec2\/, 2023."},{"key":"e_1_3_2_1_7_1","volume-title":"Amazon EC2 instance network bandwidth. https:\/\/docs.aws.amazon.com\/AWSEC2\/latest\/UserGuide\/ec2-instance-network-bandwidth.html","author":"Amazon Web Services Inc.","year":"2023","unstructured":"Amazon Web Services Inc. Amazon EC2 instance network bandwidth. https:\/\/docs.aws.amazon.com\/AWSEC2\/latest\/UserGuide\/ec2-instance-network-bandwidth.html, 2023."},{"key":"e_1_3_2_1_8_1","volume-title":"https:\/\/aws.amazon.com\/machine-learning\/inferentia\/","author":"Amazon Web Services Inc. AWS Inferentia.","year":"2023","unstructured":"Amazon Web Services Inc. AWS Inferentia. https:\/\/aws.amazon.com\/machine-learning\/inferentia\/, 2023."},{"key":"e_1_3_2_1_9_1","volume-title":"AWS Pricing Calculator. https:\/\/calculator.aws\/","author":"Amazon Web Services Inc.","year":"2023","unstructured":"Amazon Web Services Inc. AWS Pricing Calculator. https:\/\/calculator.aws\/, 2023."},{"key":"e_1_3_2_1_10_1","volume-title":"https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/general\/arch\/neuron-hardware\/neuron-core-v1.html","author":"Amazon Web Services Inc. NeuronCore-v1 Architecture.","year":"2023","unstructured":"Amazon Web Services Inc. NeuronCore-v1 Architecture. https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/general\/arch\/neuron-hardware\/neuron-core-v1.html, 2023."},{"key":"e_1_3_2_1_11_1","volume-title":"https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/frameworks\/torch\/index.html","author":"Amazon Web Services Inc. PyTorch Neuron.","year":"2023","unstructured":"Amazon Web Services Inc. PyTorch Neuron. https:\/\/awsdocs-neuron.readthedocs-hosted.com\/en\/latest\/frameworks\/torch\/index.html, 2023."},{"key":"e_1_3_2_1_12_1","volume-title":"Unlimited photo storage. https:\/\/www.amazon.com\/Amazon-Photos\/b?ie=UTF8&node=13234696011","author":"Amazon Web Services Inc.","year":"2024","unstructured":"Amazon Web Services Inc. Unlimited photo storage. https:\/\/www.amazon.com\/Amazon-Photos\/b?ie=UTF8&node=13234696011, 2024."},{"key":"e_1_3_2_1_13_1","volume-title":"Wei Hu. A Convergence Analysis of Gradient Descent for Deep Linear Neural Networks. In Proceedings of the International Conference on Learning Representations","author":"Arora Sanjeev","year":"2019","unstructured":"Sanjeev Arora, Nadav Cohen, Noah Golowich, and Wei Hu. A Convergence Analysis of Gradient Descent for Deep Linear Neural Networks. In Proceedings of the International Conference on Learning Representations, 2019."},{"key":"e_1_3_2_1_14_1","first-page":"387","volume-title":"Proceedings of the USENIX Conference on File and Storage Technologies","author":"Bae Jonghyun","year":"2021","unstructured":"Jonghyun Bae, Jongsung Lee, Yunho Jin, Sam Son, Shine Kim, Hakbeom Jang, Tae Jun Ham, and Jae W. Lee. FlashNeuron: SSD-Enabled Large-Batch Training of Very Deep Neural Networks. In Proceedings of the USENIX Conference on File and Storage Technologies, pages 387--401, 2021."},{"key":"e_1_3_2_1_15_1","first-page":"77","volume-title":"Proceedings of International Workshop on Knowledge Discovery from Data Streams","volume":"6","author":"Baena-Garc\u0131a Manuel","year":"2006","unstructured":"Manuel Baena-Garc\u0131a, Jos\u00e9 del Campo-\u00c1vila, Ra\u00fal Fidalgo, Albert Bifet, R Gavalda, and Rafael Morales-Bueno. Early drift detection method. In Proceedings of International Workshop on Knowledge Discovery from Data Streams, volume 6, pages 77--86, 2006."},{"key":"e_1_3_2_1_16_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. Language models are few-shot learners. Advances in Neural Information Processing Systems, 33:1877--1901, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3195970.3196071"},{"key":"e_1_3_2_1_18_1","first-page":"29","volume-title":"Proceedings of the USENIX Conference on File and Storage Technologies","author":"Cao Wei","year":"2020","unstructured":"Wei Cao, Yang Liu, Zhushi Cheng, Ning Zheng, Wei Li, Wenjie Wu, Linqiang Ouyang, Peng Wang, Yijing Wang, Ray Kuan, et al. {POLARDB} meets computational storage: Efficiently support analytical workloads in {Cloud-Native} relational database. In Proceedings of the USENIX Conference on File and Storage Technologies, pages 29--41, 2020."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746312"},{"key":"e_1_3_2_1_20_1","first-page":"1274","volume-title":"Proceedings of International Conference on Machine Learning","author":"Chen Minmin","year":"2013","unstructured":"Minmin Chen, Alice Zheng, and Kilian Weinberger. Fast image tagging. In Proceedings of International Conference on Machine Learning, pages 1274--1282, 2013."},{"key":"e_1_3_2_1_21_1","volume-title":"Advances in Neural Information Processing Systems","author":"Chen Xinyang","year":"2019","unstructured":"Xinyang Chen, Sinan Wang, Bo Fu, Mingsheng Long, and Jianmin Wang. Catastrophic forgetting meets negative transfer: Batch spectral shrinkage for safe transfer learning. Advances in Neural Information Processing Systems, 2019."},{"key":"e_1_3_2_1_22_1","first-page":"17981","article-title":"Staleness-Aware Pipeline for Data Parallel DNN Training","volume":"35","author":"Chen Yangrui","year":"2022","unstructured":"Yangrui Chen, Cong Xie, Meng Ma, Juncheng Gu, Yanghua Peng, Haibin Lin, Chuan Wu, and Yibo Zhu. SAPipe: Staleness-Aware Pipeline for Data Parallel DNN Training. Advances in Neural Information Processing Systems, 35:17981--17993, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/1005332.1016789"},{"key":"e_1_3_2_1_24_1","first-page":"25","article-title":"Large scale distributed deep networks","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Mark Mao, Marc'aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, et al. Large scale distributed deep networks. Advances in Neural Information Processing Systems, 25, 2012.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_26_1","volume-title":"DEFLATE compressed data format specification version 1.3. https:\/\/www.w3.org\/Graphics\/PNG\/RFC-1951","author":"Deutsch L. Peter","year":"1996","unstructured":"L. Peter Deutsch. DEFLATE compressed data format specification version 1.3. https:\/\/www.w3.org\/Graphics\/PNG\/RFC-1951, 1996."},{"key":"e_1_3_2_1_27_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. An Image is Worth 16\u00d716 Words: Transformers for Image Recognition at Scale. In Proceedings of the International Conference on Learning Representations, 2021."},{"key":"e_1_3_2_1_29_1","first-page":"929","volume-title":"Proceedings of the USENIX Symposium on Networked Systems Design and Implementation","author":"Eisenman Assaf","year":"2022","unstructured":"Assaf Eisenman, Kiran Kumar Matam, Steven Ingram, Dheevatsa Mudigere, Raghuraman Krishnamoorthi, Krishnakumar Nair, Misha Smelyanskiy, and Murali Annavaram. Check-n-run: a checkpointing system for training deep learning recommendation models. In Proceedings of the USENIX Symposium on Networked Systems Design and Implementation, pages 929--943, 2022."},{"key":"e_1_3_2_1_30_1","first-page":"381","volume-title":"Proceedings of the USENIX Annual Technical Conference","author":"Eliad Saar","year":"2021","unstructured":"Saar Eliad, Ido Hakimi, Alon De Jagger, Mark Silberstein, and Assaf Schuster. Fine-tuning giant neural networks on commodity hardware with automatic pipeline model parallelism. In Proceedings of the USENIX Annual Technical Conference, pages 381--396, 2021."},{"key":"e_1_3_2_1_31_1","volume-title":"https:\/\/www.flickr.com\/about","author":"Flickr About","year":"2024","unstructured":"Flickr. About Flickr. https:\/\/www.flickr.com\/about, 2024."},{"key":"e_1_3_2_1_32_1","volume-title":"Fast approximate nearest neighbor search with the navigating spreading-out graph. arXiv preprint arXiv:1707.00143","author":"Fu Cong","year":"2017","unstructured":"Cong Fu, Chao Xiang, Changxu Wang, and Deng Cai. Fast approximate nearest neighbor search with the navigating spreading-out graph. arXiv preprint arXiv:1707.00143, 2017."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-28645-5_29"},{"key":"e_1_3_2_1_34_1","volume-title":"A survey on concept drift adaptation. ACM computing surveys, 46(4)","author":"Gama Joao","year":"2014","unstructured":"Joao Gama, Indrundefined Zliobaitundefined, Albert Bifet, Mykola Pechenizkiy, and Abdelhamid Bouchachia. A survey on concept drift adaptation. ACM computing surveys, 46(4), 2014."},{"key":"e_1_3_2_1_35_1","volume-title":"Cnn\/transformer-based cross-model knowledge distillation for audio classification. arXiv preprint arXiv:2203.06760","author":"Gong Yuan","year":"2022","unstructured":"Yuan Gong, Sameer Khurana, Andrew Rouditchenko, and James Glass. Cmkd: Cnn\/transformer-based cross-model knowledge distillation for audio classification. arXiv preprint arXiv:2203.06760, 2022."},{"key":"e_1_3_2_1_36_1","volume-title":"Vision AI. https:\/\/cloud.google.com\/vision","author":"Google Inc.","year":"2022","unstructured":"Google Inc. Vision AI. https:\/\/cloud.google.com\/vision, 2022."},{"key":"e_1_3_2_1_37_1","volume-title":"https:\/\/www.google.com\/intl\/en_uk\/photos\/about\/","author":"Google Inc. Google Photos.","year":"2024","unstructured":"Google Inc. Google Photos. https:\/\/www.google.com\/intl\/en_uk\/photos\/about\/, 2024."},{"key":"e_1_3_2_1_38_1","volume-title":"What is Object storage? https:\/\/cloud.google.com\/learn\/what-is-object-storage","author":"Google Inc.","year":"2024","unstructured":"Google Inc. What is Object storage? https:\/\/cloud.google.com\/learn\/what-is-object-storage, 2024."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16235"},{"key":"e_1_3_2_1_40_1","volume-title":"Organize your Google Photos library with these two updates. https:\/\/blog.google\/products\/photos\/google-photos-organization-updates-november-2023\/","author":"Krovblit Guillermo","year":"2023","unstructured":"Guillermo Krovblit. Organize your Google Photos library with these two updates. https:\/\/blog.google\/products\/photos\/google-photos-organization-updates-november-2023\/, 2023."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00047"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_44_1","first-page":"26","article-title":"More effective distributed ml via a stale synchronous parallel parameter server","author":"Ho Qirong","year":"2013","unstructured":"Qirong Ho, James Cipar, Henggang Cui, Seunghak Lee, Jin Kyu Kim, Phillip B Gibbons, Garth A Gibson, Greg Ganger, and Eric P Xing. More effective distributed ml via a stale synchronous parallel parameter server. Advances in Neural Information Processing Systems, 26, 2013.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1963.10500830"},{"key":"e_1_3_2_1_46_1","first-page":"2790","volume-title":"Proceedings of International Conference on Machine Learning","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. Parameter-efficient transfer learning for NLP. In Proceedings of International Conference on Machine Learning, pages 2790--2799, 2019."},{"key":"e_1_3_2_1_47_1","volume-title":"Statistics, trends, and forecasts. Matic Broz. https:\/\/photutorial.com\/photos-statistics\/","author":"How","year":"2023","unstructured":"How many pictures are there (2023): Statistics, trends, and forecasts. Matic Broz. https:\/\/photutorial.com\/photos-statistics\/, 2023."},{"key":"e_1_3_2_1_48_1","volume-title":"https:\/\/bernardmarr.com\/how-much-data-do-we-create-every-day-the-mind-blowing-stats-everyone-should-read\/","author":"Data Do We Create How Much","year":"2021","unstructured":"How Much Data Do We Create Every Day? The Mind-Blowing Stats Everyone Should Read. Bernard Marr. https:\/\/bernardmarr.com\/how-much-data-do-we-create-every-day-the-mind-blowing-stats-everyone-should-read\/, 2021."},{"key":"e_1_3_2_1_49_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685, 2021."},{"key":"e_1_3_2_1_50_1","first-page":"103","article-title":"Gpipe: Efficient training of giant neural networks using pipeline parallelism","volume":"32","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc V Le, Yonghui Wu, et al. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in Neural Information Processing Systems, 32:103--112, 2019.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00276"},{"key":"e_1_3_2_1_52_1","unstructured":"Amazon Web Services Inc."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2021.3081752"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTEICT42901.2018.9012507"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/SIST50301.2021.9465954"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Yiping Kang Johann Hauswald Cao Gao Austin Rovinski Trevor Mudge Jason Mars and Lingjia Tang. Neurosurgeon: Collaborative Intelligence Between the Cloud and Mobile Edge. In Proceedings of the International Conference on Architectural Support for Programming Languages and Operating Systems page 615--629 2017.","DOI":"10.1145\/3093315.3037698"},{"key":"e_1_3_2_1_57_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361, 2020."},{"key":"e_1_3_2_1_58_1","first-page":"135","volume-title":"Ali R Butt. SHADE: Enable Fundamental Cacheability for Distributed Deep Learning Training. In Proceedings of the USENIX Conference on File and Storage Technologies","author":"Seraj Khan Redwan Ibne","year":"2023","unstructured":"Redwan Ibne Seraj Khan, Ahmad Hossein Yazdani, Yuqi Fu, Arnab K Paul, Bo Ji, Xun Jian, Yue Cheng, and Ali R Butt. SHADE: Enable Fundamental Cacheability for Distributed Deep Learning Training. In Proceedings of the USENIX Conference on File and Storage Technologies, pages 135--152, 2023."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2019.2930590"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3468220"},{"key":"e_1_3_2_1_62_1","volume-title":"Learning multiple layers of features from tiny images. https:\/\/www.cs.toronto.edu\/~kriz\/learning-features-2009-TR.pdf","author":"Krizhevsky Alex","year":"2009","unstructured":"Alex Krizhevsky. Learning multiple layers of features from tiny images. https:\/\/www.cs.toronto.edu\/~kriz\/learning-features-2009-TR.pdf, 2009."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3513085"},{"key":"e_1_3_2_1_64_1","first-page":"583","volume-title":"Bor-Yiing Su. Scaling Distributed Machine Learning with the Parameter Server. In Proceedings of the USENIX Symposium on Operating Systems Design and Implementation","author":"Li Mu","year":"2014","unstructured":"Mu Li, David G. Andersen, Jun Woo Park, Alexander J. Smola, Amr Ahmed, Vanja Josifovski, James Long, Eugene J. Shekita, and Bor-Yiing Su. Scaling Distributed Machine Learning with the Parameter Server. In Proceedings of the USENIX Symposium on Operating Systems Design and Implementation, page 583--598, 2014."},{"key":"e_1_3_2_1_65_1","first-page":"31","article-title":"A decentralized pipelined SGD framework for distributed deep net training","author":"Li Youjie","year":"2018","unstructured":"Youjie Li, Mingchao Yu, Songze Li, Salman Avestimehr, Nam Sung Kim, and Alexander Schwing. Pipe-SGD: A decentralized pipelined SGD framework for distributed deep net training. Advances in Neural Information Processing Systems, 31, 2018.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_66_1","first-page":"395","volume-title":"Xiaowei Li. Cognitive SSD: A Deep Learning Engine for In-Storage Data Retrieval. In Proceedings of the USENIX Annual Technical Conference","author":"Liang Shengwen","year":"2019","unstructured":"Shengwen Liang, Ying Wang, Youyou Lu, Zhe Yang, Huawei Li, and Xiaowei Li. Cognitive SSD: A Deep Learning Engine for In-Storage Data Retrieval. In Proceedings of the USENIX Annual Technical Conference, pages 395--410, 2019."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2011.2134078"},{"key":"e_1_3_2_1_68_1","first-page":"35","article-title":"Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning","author":"Liu Haokun","year":"2022","unstructured":"Haokun Liu, Derek Tam, Mohammed Muqeeth, Jay Mohta, Tenghao Huang, Mohit Bansal, and Colin A Raffel. Few-shot parameter-efficient fine-tuning is better and cheaper than in-context learning. Advances in Neural Information Processing Systems, 35, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"12","key":"e_1_3_2_1_69_1","first-page":"2346","article-title":"Learning under concept drift: A review","volume":"31","author":"Lu Jie","year":"2018","unstructured":"Jie Lu, Anjin Liu, Fan Dong, Feng Gu, Joao Gama, and Guangquan Zhang. Learning under concept drift: A review. IEEE Transactions on Knowledge and Data Engineering, 31(12):2346--2363, 2018.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_1_70_1","volume-title":"Advancing state-of-the-art image recognition with deep learning on hashtags. https:\/\/ai.facebook.com\/blog\/advancing-state-of-the-art-image-recognition-with-deep-learning-on-hashtags\/","author":"Mahajan Dhruv","year":"2018","unstructured":"Dhruv Mahajan, Ross Girshick, Vignesh Ramanathan, Manohar Paluri, and Laurens Maaten van der. Advancing state-of-the-art image recognition with deep learning on hashtags. https:\/\/ai.facebook.com\/blog\/advancing-state-of-the-art-image-recognition-with-deep-learning-on-hashtags\/, 2018."},{"key":"e_1_3_2_1_71_1","first-page":"77","article-title":"Data drift mitigation in machine learning for large-scale systems","volume":"4","author":"Mallick Ankur","year":"2022","unstructured":"Ankur Mallick, Kevin Hsieh, Behnaz Arzani, and Gauri Joshi. Matchmaker: Data drift mitigation in machine learning for large-scale systems. Proceedings of Machine Learning and Systems, 4:77--94, 2022.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_72_1","first-page":"77","article-title":"Data drift mitigation in machine learning for large-scale systems","volume":"4","author":"Mallick Ankur","year":"2022","unstructured":"Ankur Mallick, Kevin Hsieh, Behnaz Arzani, and Gauri Joshi. Matchmaker: Data drift mitigation in machine learning for large-scale systems. Proceedings of Machine Learning and Systems, 4:77--94, 2022.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_73_1","volume-title":"How Much Data Do We Create Every Day? https:\/\/bernardmarr.com\/how-much-data-do-we-create-every-day-the-mind-blowing-stats-everyone-should-read\/","author":"Marr Bernard","year":"2021","unstructured":"Bernard Marr. How Much Data Do We Create Every Day? https:\/\/bernardmarr.com\/how-much-data-do-we-create-every-day-the-mind-blowing-stats-everyone-should-read\/, 2021."},{"key":"e_1_3_2_1_74_1","volume-title":"Serving ML Model Pipelines on NVIDIA Triton Inference Server with Ensemble Models. https:\/\/www.tensorflow.org\/lite\/guide","author":"Radzihovsky Matthew","year":"2023","unstructured":"Matthew Radzihovsky and Farzan Memarian and Ethem Can and Burak Yoldemir. Serving ML Model Pipelines on NVIDIA Triton Inference Server with Ensemble Models. https:\/\/www.tensorflow.org\/lite\/guide, 2023."},{"key":"e_1_3_2_1_75_1","volume-title":"Sentiment analysis algorithms and applications: A survey. Ain Shams engineering journal, 5(4):1093--1113","author":"Medhat Walaa","year":"2014","unstructured":"Walaa Medhat, Ahmed Hassan, and Hoda Korashy. Sentiment analysis algorithms and applications: A survey. Ain Shams engineering journal, 5(4):1093--1113, 2014."},{"key":"e_1_3_2_1_76_1","unstructured":"Meta. How Facebook is using AI to improve photo descriptions for people who are blind or visually impaired. https:\/\/tech.facebook.com\/artificial-intelligence\/2021\/1\/how-facebook-is-using-ai-to-improve-photo-descriptions-for-people-who-are-blind-or-visually-impaired\/ 2021."},{"key":"e_1_3_2_1_77_1","volume-title":"Suggest content tags with NLP using deep learning. https:\/\/learn.microsoft.com\/en-us\/azure\/architecture\/solution-ideas\/articles\/website-content-tag-suggestion-with-deep-learning-and-nlp","author":"Microsoft Inc.","year":"2022","unstructured":"Microsoft Inc. Suggest content tags with NLP using deep learning. https:\/\/learn.microsoft.com\/en-us\/azure\/architecture\/solution-ideas\/articles\/website-content-tag-suggestion-with-deep-learning-and-nlp, 2022."},{"key":"e_1_3_2_1_78_1","first-page":"203","volume-title":"Fine-grained DNN Checkpointing. In 19th USENIX Conference on File and Storage Technologies","author":"Mohan Jayashree","year":"2021","unstructured":"Jayashree Mohan, Amar Phanishayee, and Vijay Chidambaram. CheckFreq: Frequent, Fine-grained DNN Checkpointing. In 19th USENIX Conference on File and Storage Technologies, pages 203--216, 2021."},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_1_80_1","volume-title":"The Story of Netflix. https:\/\/about.netflix.com\/en","author":"Netflix Inc.","year":"2024","unstructured":"Netflix Inc. The Story of Netflix. https:\/\/about.netflix.com\/en, 2024."},{"key":"e_1_3_2_1_81_1","volume-title":"NVIDIA Tesla V100. https:\/\/www.nvidia.com\/en-gb\/data-center\/tesla-v100\/","author":"NVIDIA.","year":"2017","unstructured":"NVIDIA. NVIDIA Tesla V100. https:\/\/www.nvidia.com\/en-gb\/data-center\/tesla-v100\/, 2017."},{"key":"e_1_3_2_1_82_1","volume-title":"NVIDIA T4 Tensor Core GPUs for Accelerating Inference. https:\/\/www.nvidia.com\/en-us\/data-center\/tesla-t4\/","author":"NVIDIA.","year":"2018","unstructured":"NVIDIA. NVIDIA T4 Tensor Core GPUs for Accelerating Inference. https:\/\/www.nvidia.com\/en-us\/data-center\/tesla-t4\/, 2018."},{"key":"e_1_3_2_1_83_1","volume-title":"https:\/\/developer.nvidia.com\/tensorrt","author":"Developer NVIDIA","year":"2021","unstructured":"NVIDIA Developer. NVIDIA TensorRT. https:\/\/developer.nvidia.com\/tensorrt, 2021."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01217"},{"key":"e_1_3_2_1_85_1","first-page":"307","volume-title":"Proceedings of the USENIX Annual Technical Conference","author":"Park Jay H","year":"2020","unstructured":"Jay H Park, Gyeongchan Yun, M Yi Chang, Nguyen T Nguyen, Seungmin Lee, Jaesik Choi, Sam H Noh, and Young ri Choi. Hetpipe: Enabling large DNN training on (whimpy) heterogeneous GPU clusters through integration of pipelined model parallelism and data parallelism. In Proceedings of the USENIX Annual Technical Conference, pages 307--321, 2020."},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.09.002"},{"key":"e_1_3_2_1_87_1","volume-title":"International journal of multimedia and ubiquitous engineering, 8:385--396","author":"Tian Dong","year":"2013","unstructured":"Dong ping Tian. A review on image feature extraction and representation techniques. International journal of multimedia and ubiquitous engineering, 8:385--396, 2013."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2015.160"},{"key":"e_1_3_2_1_89_1","first-page":"24","article-title":"A lock-free approach to parallelizing stochastic gradient descent","author":"Recht Benjamin","year":"2011","unstructured":"Benjamin Recht, Christopher Re, Stephen Wright, and Feng Niu. Hogwild!: A lock-free approach to parallelizing stochastic gradient descent. Advances in Neural Information Processing Systems, 24, 2011.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_90_1","first-page":"551","volume-title":"Yuxiong He. ZeRO-Offload: Democratizing Billion-Scale Model Training. In Proceedings of the USENIX Annual Technical Conference","author":"Ren Jie","year":"2021","unstructured":"Jie Ren, Samyam Rajbhandari, Yazdani Reza Aminabadi, Olatunji Ruwase, Shuangyan Yang, Minjia Zhang, Dong Li, and Yuxiong He. ZeRO-Offload: Democratizing Billion-Scale Model Training. In Proceedings of the USENIX Annual Technical Conference, pages 551--564, 2021."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_92_1","volume-title":"Horovod: fast and easy distributed deep learning in TensorFlow. arXiv preprint arXiv:1802.05799","author":"Sergeev Alexander","year":"2018","unstructured":"Alexander Sergeev and Mike Del Balso. Horovod: fast and easy distributed deep learning in TensorFlow. arXiv preprint arXiv:1802.05799, 2018."},{"key":"e_1_3_2_1_93_1","volume-title":"Deep Learning based Large Scale Visual Recommendation and Search for E-Commerce","author":"Shankar Devashish","year":"2017","unstructured":"Devashish Shankar, Sujay Narumanchi, H A Ananya, Pramod Kompalli, and Krishnendu Chaudhury. Deep Learning based Large Scale Visual Recommendation and Search for E-Commerce, 2017."},{"key":"e_1_3_2_1_94_1","first-page":"1056","volume-title":"Chun Xue. RM-SSD: In-Storage Computing for Large-Scale Recommendation Inference. In Proceedings of IEEE International Symposium on High-Performance Computer Architecture","author":"Sun Xuan","year":"2022","unstructured":"Xuan Sun, Hu Wan, Qiao Li, Chia-Lin Yang, Tei-Wei Kuo, and Chun Xue. RM-SSD: In-Storage Computing for Large-Scale Recommendation Inference. In Proceedings of IEEE International Symposium on High-Performance Computer Architecture, pages 1056--1070, 2022."},{"key":"e_1_3_2_1_95_1","volume-title":"Storage SuperServer SSG-121E-NES24R. https:\/\/www.supermicro.com\/en\/products\/system\/storage\/1u\/ssg-121e-nes24r","author":"SuperMicro Computer Inc. .","year":"2024","unstructured":"SuperMicro Computer Inc. . Storage SuperServer SSG-121E-NES24R. https:\/\/www.supermicro.com\/en\/products\/system\/storage\/1u\/ssg-121e-nes24r, 2024."},{"key":"e_1_3_2_1_96_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_97_1","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-60566-766-9.ch011"},{"key":"e_1_3_2_1_98_1","first-page":"513","volume-title":"Proceedings of the USENIX Symposium on Networked Systems Design and Implementation","author":"Uta Alexandru","year":"2020","unstructured":"Alexandru Uta, Alexandru Custura, Dmitry Duplyakin, Ivo Jimenez, Jan Rellermeyer, Carlos Maltzahn, Robert Ricci, and Alexandru Iosup. Is big data performance reproducible in modern cloud networks? In Proceedings of the USENIX Symposium on Networked Systems Design and Implementation, pages 513--527, 2020."},{"key":"e_1_3_2_1_99_1","volume-title":"Deep learning for retail product recognition: Challenges and techniques. Computational intelligence and neuroscience","author":"Wei Yuchen","year":"2020","unstructured":"Yuchen Wei, Son Tran, Shuxiang Xu, Byeong Kang, and Matthew Springer. Deep learning for retail product recognition: Challenges and techniques. Computational intelligence and neuroscience, 2020, 2020."},{"key":"e_1_3_2_1_100_1","first-page":"30","article-title":"Ternary Gradients to Reduce Communication","author":"Wen Wei","year":"2017","unstructured":"Wei Wen, Cong Xu, Feng Yan, Chunpeng Wu, Yandan Wang, Yiran Chen, and Hai Li. TernGrad: Ternary Gradients to Reduce Communication in Distributed Deep Learning. Advances in Neural Information Processing Systems, 30, 2017.","journal-title":"Distributed Deep Learning. Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_101_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"e_1_3_2_1_102_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-50341-3_21"},{"key":"e_1_3_2_1_103_1","volume-title":"Proceedings of the VLDB Endowment, 14(11)","author":"Yang Yifei","year":"2021","unstructured":"Yifei Yang, Matt Youill, Matthew Woicik, Yizhou Liu, Xiangyao Yu, Marco Serafini, Ashraf Aboulnaga, and Michael Stonebraker. Flex-pushdowndb: Hybrid pushdown and caching in a cloud dbms. Proceedings of the VLDB Endowment, 14(11), 2021."},{"key":"e_1_3_2_1_104_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2017.2712809"},{"key":"e_1_3_2_1_105_1","first-page":"27","article-title":"How transferable are features in deep neural networks?","author":"Yosinski Jason","year":"2014","unstructured":"Jason Yosinski, Jeff Clune, Yoshua Bengio, and Hod Lipson. How transferable are features in deep neural networks? Advances in Neural Information Processing Systems, 27, 2014.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Ever wonder how YouTube works? https:\/\/www.youtube.com\/intl\/en_us\/howyoutubeworks\/","year":"2024","key":"e_1_3_2_1_106_1","unstructured":"Youtube. Ever wonder how YouTube works? https:\/\/www.youtube.com\/intl\/en_us\/howyoutubeworks\/, 2024."},{"key":"e_1_3_2_1_107_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_2_1_108_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10305"}],"event":{"name":"ASPLOS '24: 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture","SIGOPS ACM Special Interest Group on Operating Systems","SIGPLAN ACM Special Interest Group on Programming Languages","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"La Jolla CA USA","acronym":"ASPLOS '24"},"container-title":["Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651345","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:43Z","timestamp":1750291423000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3620666.3651345"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,27]]},"references-count":108,"alternative-id":["10.1145\/3620666.3651345","10.1145\/3620666"],"URL":"https:\/\/doi.org\/10.1145\/3620666.3651345","relation":{},"subject":[],"published":{"date-parts":[[2024,4,27]]},"assertion":[{"value":"2024-04-27","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}