{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:04:49Z","timestamp":1775815489814,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":93,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100000923","name":"Australian Research Council","doi-asserted-by":"publisher","award":["DP180100212, DP200102491"],"award-info":[{"award-number":["DP180100212, DP200102491"]}],"id":[{"id":"10.13039\/501100000923","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583540","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"3142-3153","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["EdgeMove: Pipelining Device-Edge Model Training for Mobile Intelligence"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8496-7224","authenticated-orcid":false,"given":"Zeqian","family":"Dong","sequence":"first","affiliation":[{"name":"Swinburne University of Technology, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2607-4556","authenticated-orcid":false,"given":"Qiang","family":"He","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, China and Swinburne University of Technology, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5455-3792","authenticated-orcid":false,"given":"Feifei","family":"Chen","sequence":"additional","affiliation":[{"name":"Deakin University, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3934-7605","authenticated-orcid":false,"given":"Hai","family":"Jin","sequence":"additional","affiliation":[{"name":"Huazhong University of Science and Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1350-6639","authenticated-orcid":false,"given":"Tao","family":"Gu","sequence":"additional","affiliation":[{"name":"Macquarie University, Australia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7868-5471","authenticated-orcid":false,"given":"Yun","family":"Yang","sequence":"additional","affiliation":[{"name":"Swinburne University of Technology, Australia"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Meta AI. 2021. PyTorch Mobile. https:\/\/pytorch.org\/mobile"},{"key":"e_1_3_2_2_2_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation. 119\u2013135","author":"Bhardwaj Romil","year":"2022","unstructured":"Romil Bhardwaj, Zhengxu Xia, Ganesh Ananthanarayanan, Junchen Jiang, Yuanchao Shu, Nikolaos Karianakis, Kevin Hsieh, Paramvir Bahl, and Ion Stoica. 2022. Ekya: Continuous learning of video analytics models on edge compute servers. In 19th USENIX Symposium on Networked Systems Design and Implementation. 119\u2013135."},{"key":"e_1_3_2_2_3_1","first-page":"22593","article-title":"Distributed distillation for on-device learning","volume":"33","author":"Bistritz Ilai","year":"2020","unstructured":"Ilai Bistritz, Ariana Mann, and Nicholas Bambos. 2020. Distributed distillation for on-device learning. Advances in Neural Information Processing Systems 33 (2020), 22593\u201322604.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_4_1","first-page":"374","article-title":"Towards federated learning at scale: System design","volume":"1","author":"Bonawitz Keith","year":"2019","unstructured":"Keith Bonawitz, Hubert Eichner, Wolfgang Grieskamp, Dzmitry Huba, Alex Ingerman, Vladimir Ivanov, Chloe Kiddon, Jakub Kone\u010dn\u1ef3, Stefano Mazzocchi, Brendan McMahan, 2019. Towards federated learning at scale: System design. Proceedings of Machine Learning and Systems 1 (2019), 374\u2013388.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"e_1_3_2_2_6_1","first-page":"11285","article-title":"Tinytl: Reduce memory, not parameters for efficient on-device learning","volume":"33","author":"Cai Han","year":"2020","unstructured":"Han Cai, Chuang Gan, Ligeng Zhu, and Song Han. 2020. Tinytl: Reduce memory, not parameters for efficient on-device learning. Advances in Neural Information Processing Systems 33 (2020), 11285\u201311297.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_7_1","volume-title":"Audio adversarial examples: Targeted attacks on speech-to-text","author":"Carlini Nicholas","unstructured":"Nicholas Carlini and David Wagner. 2018. Audio adversarial examples: Targeted attacks on speech-to-text. In IEEE security and privacy workshops. IEEE, 1\u20137."},{"key":"e_1_3_2_2_8_1","volume-title":"19th USENIX Symposium on Networked Systems Design and Implementation. 35\u201350","author":"Chen Jun\u00a0Lin","year":"2022","unstructured":"Jun\u00a0Lin Chen, Daniyal Liaqat, Moshe Gabel, and Eyal de Lara. 2022. Starlight: Fast Container Provisioning on the Edge and over the { WAN}. In 19th USENIX Symposium on Networked Systems Design and Implementation. 35\u201350."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2020.3045436"},{"key":"e_1_3_2_2_10_1","volume-title":"International Conference on Machine Learning. PMLR, 2285\u20132294","author":"Chen Wenlin","year":"2015","unstructured":"Wenlin Chen, James Wilson, Stephen Tyree, Kilian Weinberger, and Yixin Chen. 2015. Compressing neural networks with the hashing trick. In International Conference on Machine Learning. PMLR, 2285\u20132294."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2015.2487344"},{"key":"e_1_3_2_2_12_1","volume-title":"CINIC-10 is not imagenet or CIFAR-10. arXiv preprint arXiv:1810.03505","author":"Darlow N","year":"2018","unstructured":"Luke\u00a0N Darlow, Elliot\u00a0J Crowley, Antreas Antoniou, and Amos\u00a0J Storkey. 2018. CINIC-10 is not imagenet or CIFAR-10. arXiv preprint arXiv:1810.03505 (2018)."},{"key":"e_1_3_2_2_13_1","first-page":"1223","article-title":"Large scale distributed deep networks","volume":"25","author":"Dean Jeffrey","year":"2012","unstructured":"Jeffrey Dean, Greg Corrado, Rajat Monga, Kai Chen, Matthieu Devin, Mark Mao, Marc\u2019aurelio Ranzato, Andrew Senior, Paul Tucker, Ke Yang, 2012. Large scale distributed deep networks. Advances in Neural Information Processing Systems 25 (2012), 1223\u20131231.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2021.3137380"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2019.2947893"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249\u2013256","author":"Glorot Xavier","year":"2010","unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In Proceedings of the thirteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings, 249\u2013256."},{"key":"e_1_3_2_2_17_1","unstructured":"Google. 2017. TensorFlow Lite. https:\/\/www.tensorflow.org\/lite"},{"key":"e_1_3_2_2_18_1","unstructured":"Google. 2022. Use Battery Saver on a Pixel phone. https:\/\/support.google.com\/pixelphone\/answer\/6187458"},{"key":"e_1_3_2_2_19_1","unstructured":"Google. 2022. What is Pub\/Sub?https:\/\/cloud.google.com\/pubsub\/docs\/overview"},{"key":"e_1_3_2_2_20_1","volume-title":"18th USENIX Symposium on Networked Systems Design and Implementation. 705\u2013719","author":"Guo Peizhen","year":"2021","unstructured":"Peizhen Guo, Bo Hu, and Wenjun Hu. 2021. Mistify: Automating DNN Model Porting for On-Device Inference at the Edge. In 18th USENIX Symposium on Networked Systems Design and Implementation. 705\u2013719."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3046440"},{"key":"e_1_3_2_2_22_1","volume-title":"Edge computing in 5G for drone navigation: what to offload?IEEE Robotics and Automation Letters 6, 2","author":"Hayat Samira","year":"2021","unstructured":"Samira Hayat, Roland Jung, Hermann Hellwagner, Christian Bettstetter, Driton Emini, and Dominik Schnieders. 2021. Edge computing in 5G for drone navigation: what to offload?IEEE Robotics and Automation Letters 6, 2 (2021), 2571\u20132578."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2938944"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6207"},{"key":"e_1_3_2_2_26_1","first-page":"19365","article-title":"Self-adaptive training: beyond empirical risk minimization","volume":"33","author":"Huang Lang","year":"2020","unstructured":"Lang Huang, Chao Zhang, and Hongyang Zhang. 2020. Self-adaptive training: beyond empirical risk minimization. Advances in Neural Information Processing Systems 33 (2020), 19365\u201319376.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_27_1","volume-title":"Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in Neural Information Processing Systems 32","author":"Huang Yanping","year":"2019","unstructured":"Yanping Huang, Youlong Cheng, Ankur Bapna, Orhan Firat, Dehao Chen, Mia Chen, HyoukJoong Lee, Jiquan Ngiam, Quoc\u00a0V Le, Yonghui Wu, 2019. Gpipe: Efficient training of giant neural networks using pipeline parallelism. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_2_28_1","volume-title":"Related\u2019s Hudson Yards: Smart City or Surveillance City?The Real Deal 15","author":"Jeans David","year":"2019","unstructured":"David Jeans. 2019. Related\u2019s Hudson Yards: Smart City or Surveillance City?The Real Deal 15 (2019)."},{"key":"e_1_3_2_2_29_1","volume-title":"Exploring Hidden Dimensions in Parallelizing Convolutional Neural Networks. In 28 International Conference on Machine Learning. 2279\u20132288","author":"Jia Zhihao","year":"2018","unstructured":"Zhihao Jia, Sina Lin, Charles\u00a0R Qi, and Alex Aiken. 2018. Exploring Hidden Dimensions in Parallelizing Convolutional Neural Networks. In 28 International Conference on Machine Learning. 2279\u20132288."},{"key":"e_1_3_2_2_30_1","volume-title":"Beyond Data and Model Parallelism for Deep Neural Networks.Proceedings of Machine Learning and Systems 1","author":"Jia Zhihao","year":"2019","unstructured":"Zhihao Jia, Matei Zaharia, and Alex Aiken. 2019. Beyond Data and Model Parallelism for Deep Neural Networks.Proceedings of Machine Learning and Systems 1 (2019), 1\u201313."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3035933"},{"key":"e_1_3_2_2_32_1","unstructured":"Avi Kewalramani. 2020. Live Video Analytics with Microsoft Rocket for reducing edge compute costs. https:\/\/techcommunity.microsoft.com\/t5\/internet-of-things-blog\/live-video-analytics-with-microsoft-rocket-for-reducing-edge\/ba-p\/1522305"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00090"},{"key":"e_1_3_2_2_34_1","volume-title":"Flexpoint: An adaptive numerical format for efficient training of deep neural networks. Advances in Neural Information Processing Systems 30","author":"K\u00f6ster Urs","year":"2017","unstructured":"Urs K\u00f6ster, Tristan Webb, Xin Wang, Marcel Nassar, Arjun\u00a0K Bansal, William Constable, Oguz Elibol, Scott Gray, Stewart Hall, Luke Hornof, 2017. Flexpoint: An adaptive numerical format for efficient training of deep neural networks. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_2_35_1","unstructured":"Alex Krizhevsky Geoffrey Hinton 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_2_36_1","volume-title":"Optimal Edge User Allocation in Edge Computing with Variable Sized Vector Bin Packing. In International Conference on Service-Oriented Computing. Springer, 230\u2013245","author":"Lai Phu","year":"2018","unstructured":"Phu Lai, Qiang He, Mohamed Abdelrazek, Feifei Chen, John Hosking, John Grundy, and Yun Yang. 2018. Optimal Edge User Allocation in Edge Computing with Variable Sized Vector Bin Packing. In International Conference on Service-Oriented Computing. Springer, 230\u2013245."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3043755"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539104"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOST49136.2021.9702279"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/2640087.2644155"},{"key":"e_1_3_2_2_41_1","volume-title":"32nd International Conference on Neural Information Processing Systems. 8056\u20138067","author":"Li Youjie","year":"2018","unstructured":"Youjie Li, Mingchao Yu, Songze Li, Salman Avestimehr, Nam\u00a0Sung Kim, and Alexander Schwing. 2018. Pipe-SGD: a decentralized pipelined SGD framework for distributed deep net training. In 32nd International Conference on Neural Information Processing Systems. 8056\u20138067."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/IWQoS54832.2022.9812887"},{"key":"e_1_3_2_2_43_1","volume-title":"International Conference on Machine Learning. PMLR, 3043\u20133052","author":"Lian Xiangru","year":"2018","unstructured":"Xiangru Lian, Wei Zhang, Ce Zhang, and Ji Liu. 2018. Asynchronous decentralized parallel stochastic gradient descent. In International Conference on Machine Learning. PMLR, 3043\u20133052."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001164"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2017.2662008"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3300116"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"e_1_3_2_2_48_1","unstructured":"Reiner Ludwig. 2022. Who cares about latency in 5G?https:\/\/www.ericsson.com\/en\/blog\/2022\/8\/who-cares-about-latency-in-5g"},{"key":"e_1_3_2_2_49_1","volume-title":"14th USENIX Symposium on Operating Systems Design and Implementation. 937\u2013954","author":"Mai Luo","year":"2020","unstructured":"Luo Mai, Guo Li, Marcel Wagenl\u00e4nder, Konstantinos Fertakis, Andrei-Octavian Brabete, and Peter Pietzuch. 2020. { KungFu} : Making Training in Distributed Machine Learning Adaptive. In 14th USENIX Symposium on Operating Systems Design and Implementation. 937\u2013954."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2021.3085527"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2017.2745201"},{"key":"e_1_3_2_2_52_1","unstructured":"Microsoft. 2022. Pub\/Sub messaging. https:\/\/aws.amazon.com\/pub-sub-messaging\/"},{"key":"e_1_3_2_2_53_1","unstructured":"Microsoft. 2022. What is Azure web PubSub service?https:\/\/learn.microsoft.com\/en-us\/azure\/azure-web-pubsub\/overview"},{"key":"e_1_3_2_2_54_1","volume-title":"5th International Conference on Learning Representations.","author":"Molchanov P","year":"2019","unstructured":"P Molchanov, S Tyree, T Karras, T Aila, and J Kautz. 2019. Pruning convolutional neural networks for resource efficient inference. In 5th International Conference on Learning Representations."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"e_1_3_2_2_56_1","volume-title":"International Conference on Machine Learning. PMLR, 7937\u20137947","author":"Narayanan Deepak","year":"2021","unstructured":"Deepak Narayanan, Amar Phanishayee, Kaiyu Shi, Xie Chen, and Matei Zaharia. 2021. Memory-efficient pipeline-parallel dnn training. In International Conference on Machine Learning. PMLR, 7937\u20137947."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2019.00065"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298640"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586259"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512153"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2018.2869954"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796705"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2019.2941458"},{"key":"e_1_3_2_2_65_1","volume-title":"USENIX Annual Technical Conference. 307\u2013321","author":"Park H","year":"2020","unstructured":"Jay\u00a0H Park, Gyeongchan Yun, M\u00a0Yi Chang, Nguyen\u00a0T Nguyen, Seungmin Lee, Jaesik Choi, Sam\u00a0H Noh, and Young-ri Choi. 2020. HetPipe: Enabling Large DNN Training on (Whimpy) Heterogeneous GPU Clusters through Integration of Pipelined Model Parallelism and Data Parallelism. In USENIX Annual Technical Conference. 307\u2013321."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512146"},{"key":"e_1_3_2_2_67_1","volume-title":"2nd USENIX Workshop on Hot Topics in Edge Computing.","author":"Rausch Thomas","year":"2019","unstructured":"Thomas Rausch, Waldemar Hummer, Vinod Muthusamy, Alexander Rashed, and Schahram Dustdar. 2019. Towards a serverless platform for edge { AI}. In 2nd USENIX Workshop on Hot Topics in Edge Computing."},{"key":"e_1_3_2_2_68_1","volume-title":"Multi-tenant Edge Clouds. In USENIX Annual Technical Conference. 927\u2013942","author":"Ren Yuxin","year":"2020","unstructured":"Yuxin Ren, Guyue Liu, Vlad Nitu, Wenyuan Shao, Riley Kennedy, Gabriel Parmer, Timothy Wood, and Alain Tchana. 2020. { Fine-Grained} Isolation for Scalable, Dynamic, Multi-tenant Edge Clouds. In USENIX Annual Technical Conference. 927\u2013942."},{"key":"e_1_3_2_2_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472883.3487008"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1109\/COMST.2020.3007787"},{"key":"e_1_3_2_2_71_1","volume-title":"22nd ACM SIGSAC Conference on Computer and Communications Security. 1310\u20131321","author":"Shokri Reza","year":"2015","unstructured":"Reza Shokri and Vitaly Shmatikov. 2015. Privacy-preserving deep learning. In 22nd ACM SIGSAC Conference on Computer and Communications Security. 1310\u20131321."},{"key":"e_1_3_2_2_72_1","volume-title":"3rd International Conference on Learning Representations","author":"Simonyan Karen","year":"2015","unstructured":"Karen Simonyan and Andrew Zisserman. 2015. Very deep convolutional networks for large-scale image recognition. 3rd International Conference on Learning Representations (2015)."},{"key":"e_1_3_2_2_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372297.3417270"},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20825"},{"key":"e_1_3_2_2_75_1","volume-title":"Online Learning for Rate-Adaptive Task Offloading Under Latency Constraints in Serverless Edge Computing","author":"T\u00fct\u00fcnc\u00fco\u011flu Feridun","year":"2022","unstructured":"Feridun T\u00fct\u00fcnc\u00fco\u011flu, Sla\u0111ana Jo\u0161ilo, and Gy\u00f6rgy D\u00e1n. 2022. Online Learning for Rate-Adaptive Task Offloading Under Latency Constraints in Serverless Edge Computing. IEEE\/ACM Transactions on Networking (2022)."},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2018.8486411"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2018.2867520"},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNET.2019.2916577"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM48880.2022.9796961"},{"key":"e_1_3_2_2_80_1","volume-title":"Enabling Efficient Large-Scale Deep Learning Training with Cache Coherent Disaggregated Memory Systems. In IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 126\u2013140","author":"Wang Zixuan","year":"2022","unstructured":"Zixuan Wang, Joonseop Sim, Euicheol Lim, and Jishen Zhao. 2022. Enabling Efficient Large-Scale Deep Learning Training with Cache Coherent Disaggregated Memory Systems. In IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 126\u2013140."},{"key":"e_1_3_2_2_81_1","volume-title":"Speech commands: A dataset for limited-vocabulary speech recognition. arXiv preprint arXiv:1804.03209","author":"Warden Pete","year":"2018","unstructured":"Pete Warden. 2018. Speech commands: A dataset for limited-vocabulary speech recognition. arXiv preprint arXiv:1804.03209 (2018)."},{"key":"e_1_3_2_2_82_1","volume-title":"The marginal value of adaptive gradient methods in machine learning. Advances in Neural Information Processing Systems 30","author":"Wilson C","year":"2017","unstructured":"Ashia\u00a0C Wilson, Rebecca Roelofs, Mitchell Stern, Nati Srebro, and Benjamin Recht. 2017. The marginal value of adaptive gradient methods in machine learning. Advances in Neural Information Processing Systems 30 (2017)."},{"key":"e_1_3_2_2_83_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00048"},{"key":"e_1_3_2_2_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2020.3016344"},{"key":"e_1_3_2_2_85_1","volume-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12277\u201312286","author":"Yang Li","year":"2022","unstructured":"Li Yang, Adnan\u00a0Siraj Rakin, and Deliang Fan. 2022. Rep-Net: Efficient On-Device Learning via Feature Reprogramming. In IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 12277\u201312286."},{"key":"e_1_3_2_2_86_1","volume-title":"Group-based Interleaved Pipeline Parallelism for Large-scale DNN Training. In International Conference on Learning Representations.","author":"Yang PengCheng","year":"2021","unstructured":"PengCheng Yang, Xiaoming Zhang, Wenpeng Zhang, Ming Yang, and Hong Wei. 2021. Group-based Interleaved Pipeline Parallelism for Large-scale DNN Training. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3494981"},{"key":"e_1_3_2_2_88_1","first-page":"20838","article-title":"MEST: Accurate and fast memory-economic sparse training framework on the edge","volume":"34","author":"Yuan Geng","year":"2021","unstructured":"Geng Yuan, Xiaolong Ma, Wei Niu, Zhengang Li, Zhenglun Kong, Ning Liu, Yifan Gong, Zheng Zhan, Chaoyang He, Qing Jin, 2021. MEST: Accurate and fast memory-economic sparse training framework on the edge. Advances in Neural Information Processing Systems 34 (2021), 20838\u201320850.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_89_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449994"},{"key":"e_1_3_2_2_90_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450051"},{"key":"e_1_3_2_2_91_1","volume-title":"Character-level convolutional networks for text classification. Advances in neural information processing systems 28","author":"Zhang Xiang","year":"2015","unstructured":"Xiang Zhang, Junbo Zhao, and Yann LeCun. 2015. Character-level convolutional networks for text classification. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_2_2_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_2_2_93_1","volume-title":"Where to go next: A spatio-temporal gated network for next poi recommendation","author":"Zhao Pengpeng","year":"2020","unstructured":"Pengpeng Zhao, Anjing Luo, Yanchi Liu, Fuzhen Zhuang, Jiajie Xu, Zhixu Li, Victor\u00a0S Sheng, and Xiaofang Zhou. 2020. Where to go next: A spatio-temporal gated network for next poi recommendation. IEEE Transactions on Knowledge and Data Engineering (2020)."}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583540","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583540","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:03Z","timestamp":1750178823000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583540"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":93,"alternative-id":["10.1145\/3543507.3583540","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583540","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}