{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:22:57Z","timestamp":1776885777048,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":91,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,18]],"date-time":"2023-06-18T00:00:00Z","timestamp":1687046400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1812407"],"award-info":[{"award-number":["CNS-1812407"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2029520"],"award-info":[{"award-number":["CNS-2029520"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-1956002"],"award-info":[{"award-number":["IIS-1956002"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2205360"],"award-info":[{"award-number":["IIS-2205360"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2217003"],"award-info":[{"award-number":["CCF-2217003"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CCF-2215042"],"award-info":[{"award-number":["CCF-2215042"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,18]]},"DOI":"10.1145\/3581791.3596852","type":"proceedings-article","created":{"date-parts":[[2023,6,16]],"date-time":"2023-06-16T17:52:21Z","timestamp":1686937941000},"page":"56-69","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":25,"title":["ElasticTrainer: Speeding Up On-Device Training with Runtime Elastic Tensor Selection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2569-2309","authenticated-orcid":false,"given":"Kai","family":"Huang","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5517-5882","authenticated-orcid":false,"given":"Boyuan","family":"Yang","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2144-6960","authenticated-orcid":false,"given":"Wei","family":"Gao","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Pittsburgh, Pittsburgh, PA, United States of America"}]}],"member":"320","published-online":{"date-parts":[[2023,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Chainer. https:\/\/github.com\/chainer\/chainer.  Chainer. https:\/\/github.com\/chainer\/chainer."},{"key":"e_1_3_2_1_2_1","unstructured":"Dynet. https:\/\/github.com\/clab\/dynet.  Dynet. https:\/\/github.com\/clab\/dynet."},{"key":"e_1_3_2_1_3_1","unstructured":"htop. https:\/\/htop.dev\/.  htop. https:\/\/htop.dev\/."},{"key":"e_1_3_2_1_4_1","unstructured":"Jetbot. https:\/\/developer.nvidia.com\/embedded\/learn\/jetbot.  Jetbot. https:\/\/developer.nvidia.com\/embedded\/learn\/jetbot."},{"key":"e_1_3_2_1_5_1","unstructured":"jetson-stats. https:\/\/github.com\/rbonghi\/jetsonstats.  jetson-stats. https:\/\/github.com\/rbonghi\/jetsonstats."},{"key":"e_1_3_2_1_6_1","unstructured":"Mnn. https:\/\/github.com\/alibaba\/MNN.  Mnn. https:\/\/github.com\/alibaba\/MNN."},{"key":"e_1_3_2_1_7_1","unstructured":"Mxnet. https:\/\/github.com\/apache\/mxnet.  Mxnet. https:\/\/github.com\/apache\/mxnet."},{"key":"e_1_3_2_1_8_1","unstructured":"Nvidia jetson tx2. https:\/\/developer.nvidia.com\/embedded\/jetson-tx2.  Nvidia jetson tx2. https:\/\/developer.nvidia.com\/embedded\/jetson-tx2."},{"key":"e_1_3_2_1_9_1","unstructured":"Poniie power meter. https:\/\/poniie.com\/products\/6.  Poniie power meter. https:\/\/poniie.com\/products\/6."},{"key":"e_1_3_2_1_10_1","unstructured":"Raspberry pi 4b. https:\/\/www.raspberrypi.com\/products\/raspberry-pi-4-model-b\/.  Raspberry pi 4b. https:\/\/www.raspberrypi.com\/products\/raspberry-pi-4-model-b\/."},{"key":"e_1_3_2_1_11_1","unstructured":"Skydio 2. https:\/\/developer.nvidia.com\/blog\/skydio-2-jetson-tx2-drone\/.  Skydio 2. https:\/\/developer.nvidia.com\/blog\/skydio-2-jetson-tx2-drone\/."},{"key":"e_1_3_2_1_12_1","unstructured":"Tensorflow lite. https:\/\/www.tensorflow.org\/lite.  Tensorflow lite. https:\/\/www.tensorflow.org\/lite."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2951913.2976746"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298911"},{"key":"e_1_3_2_1_15_1","volume-title":"Qsgd: Communication-efficient sgd via gradient quantization and encoding. Advances in neural information processing systems, 30","author":"Alistarh D.","year":"2017","unstructured":"D. Alistarh , D. Grubic , J. Li , R. Tomioka , and M. Vojnovic . Qsgd: Communication-efficient sgd via gradient quantization and encoding. Advances in neural information processing systems, 30 , 2017 . D. Alistarh, D. Grubic, J. Li, R. Tomioka, and M. Vojnovic. Qsgd: Communication-efficient sgd via gradient quantization and encoding. Advances in neural information processing systems, 30, 2017."},{"key":"e_1_3_2_1_16_1","volume-title":"Backpropagation and stochastic gradient descent method. Neuro-computing, 5(4--5):185--196","author":"Amari S.-i.","year":"1993","unstructured":"S.-i. Amari . Backpropagation and stochastic gradient descent method. Neuro-computing, 5(4--5):185--196 , 1993 . S.-i. Amari. Backpropagation and stochastic gradient descent method. Neuro-computing, 5(4--5):185--196, 1993."},{"key":"e_1_3_2_1_17_1","first-page":"173","volume-title":"International conference on machine learning","author":"Amodei D.","year":"2016","unstructured":"D. Amodei , S. Ananthanarayanan , R. Anubhai , J. Bai , E. Battenberg , C. Case , J. Casper , B. Catanzaro , Q. Cheng , G. Chen , Deep speech 2: End-to-end speech recognition in english and mandarin . In International conference on machine learning , pages 173 -- 182 . PMLR, 2016 . D. Amodei, S. Ananthanarayanan, R. Anubhai, J. Bai, E. Battenberg, C. Case, J. Casper, B. Catanzaro, Q. Cheng, G. Chen, et al. Deep speech 2: End-to-end speech recognition in english and mandarin. In International conference on machine learning, pages 173--182. PMLR, 2016."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"e_1_3_2_1_19_1","volume-title":"Layer normalization. arXiv preprint arXiv:1607.06450","author":"Ba J. L.","year":"2016","unstructured":"J. L. Ba , J. R. Kiros , and G. E. Hinton . Layer normalization. arXiv preprint arXiv:1607.06450 , 2016 . J. L. Ba, J. R. Kiros, and G. E. Hinton. Layer normalization. arXiv preprint arXiv:1607.06450, 2016."},{"key":"e_1_3_2_1_20_1","volume-title":"Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473","author":"Bahdanau D.","year":"2014","unstructured":"D. Bahdanau , K. Cho , and Y. Bengio . Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 , 2014 . D. Bahdanau, K. Cho, and Y. Bengio. Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473, 2014."},{"key":"e_1_3_2_1_21_1","volume-title":"Understanding dropout. Advances in neural information processing systems, 26","author":"Baldi P.","year":"2013","unstructured":"P. Baldi and P. J. Sadowski . Understanding dropout. Advances in neural information processing systems, 26 , 2013 . P. Baldi and P. J. Sadowski. Understanding dropout. Advances in neural information processing systems, 26, 2013."},{"key":"e_1_3_2_1_22_1","first-page":"1","article-title":"Automatic differentiation in machine learning: a survey","volume":"18","author":"Baydin A. G.","year":"2018","unstructured":"A. G. Baydin , B. A. Pearlmutter , A. A. Radul , and J. M. Siskind . Automatic differentiation in machine learning: a survey . Journal of Marchine Learning Research , 18 : 1 -- 43 , 2018 . A. G. Baydin, B. A. Pearlmutter, A. A. Radul, and J. M. Siskind. Automatic differentiation in machine learning: a survey. Journal of Marchine Learning Research, 18:1--43, 2018.","journal-title":"Journal of Marchine Learning Research"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i11.21699"},{"key":"e_1_3_2_1_24_1","first-page":"17","volume-title":"Proceedings of ICML workshop on unsupervised and transfer learning","author":"Bengio Y.","year":"2012","unstructured":"Y. Bengio . Deep learning of representations for unsupervised and transfer learning . In Proceedings of ICML workshop on unsupervised and transfer learning , pages 17 -- 36 . JMLR Workshop and Conference Proceedings , 2012 . Y. Bengio. Deep learning of representations for unsupervised and transfer learning. In Proceedings of ICML workshop on unsupervised and transfer learning, pages 17--36. JMLR Workshop and Conference Proceedings, 2012."},{"key":"e_1_3_2_1_25_1","first-page":"374","article-title":"Towards federated learning at scale: System design","volume":"1","author":"Bonawitz K.","year":"2019","unstructured":"K. Bonawitz , H. Eichner , W. Grieskamp , D. Huba , A. Ingerman , V. Ivanov , C. Kiddon , J. Kone\u010dn\u1ef3 , S. Mazzocchi , B. McMahan , Towards federated learning at scale: System design . Proceedings of Machine Learning and Systems , 1 : 374 -- 388 , 2019 . K. Bonawitz, H. Eichner, W. Grieskamp, D. Huba, A. Ingerman, V. Ivanov, C. Kiddon, J. Kone\u010dn\u1ef3, S. Mazzocchi, B. McMahan, et al. Towards federated learning at scale: System design. Proceedings of Machine Learning and Systems, 1:374--388, 2019.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1023\/a:1010933404324"},{"key":"e_1_3_2_1_27_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901","author":"Brown T.","year":"2020","unstructured":"T. Brown , B. Mann , N. Ryder , M. Subbiah , J. D. Kaplan , P. Dhariwal , A. Neelakantan , P. Shyam , G. Sastry , A. Askell , Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901 , 2020 . T. Brown, B. Mann, N. Ryder, M. Subbiah, J. D. Kaplan, P. Dhariwal, A. Neelakantan, P. Shyam, G. Sastry, A. Askell, et al. Language models are few-shot learners. Advances in neural information processing systems, 33:1877--1901, 2020."},{"key":"e_1_3_2_1_28_1","volume-title":"Tinytl: Reduce activations, not trainable parameters for efficient on-device learning. arXiv preprint arXiv:2007.11622","author":"Cai H.","year":"2020","unstructured":"H. Cai , C. Gan , L. Zhu , and S. Han . Tinytl: Reduce activations, not trainable parameters for efficient on-device learning. arXiv preprint arXiv:2007.11622 , 2020 . H. Cai, C. Gan, L. Zhu, and S. Han. Tinytl: Reduce activations, not trainable parameters for efficient on-device learning. arXiv preprint arXiv:2007.11622, 2020."},{"key":"e_1_3_2_1_29_1","volume-title":"Return of the devil in the details: Delving deep into convolutional nets. arXiv preprint arXiv:1405.3531","author":"Chatfield K.","year":"2014","unstructured":"K. Chatfield , K. Simonyan , A. Vedaldi , and A. Zisserman . Return of the devil in the details: Delving deep into convolutional nets. arXiv preprint arXiv:1405.3531 , 2014 . K. Chatfield, K. Simonyan, A. Vedaldi, and A. Zisserman. Return of the devil in the details: Delving deep into convolutional nets. arXiv preprint arXiv:1405.3531, 2014."},{"key":"e_1_3_2_1_30_1","volume-title":"Net2net: Accelerating learning via knowledge transfer. arXiv preprint arXiv:1511.05641","author":"Chen T.","year":"2015","unstructured":"T. Chen , I. Goodfellow , and J. Shlens . Net2net: Accelerating learning via knowledge transfer. arXiv preprint arXiv:1511.05641 , 2015 . T. Chen, I. Goodfellow, and J. Shlens. Net2net: Accelerating learning via knowledge transfer. arXiv preprint arXiv:1511.05641, 2015."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2211477"},{"key":"e_1_3_2_1_33_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin J.","year":"2018","unstructured":"J. Devlin , M.-W. Chang , K. Lee , and K. Toutanova . Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 , 2018 . J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_34_1","first-page":"647","volume-title":"International conference on machine learning","author":"Donahue J.","year":"2014","unstructured":"J. Donahue , Y. Jia , O. Vinyals , J. Hoffman , N. Zhang , E. Tzeng , and T. Darrell . Decaf: A deep convolutional activation feature for generic visual recognition . In International conference on machine learning , pages 647 -- 655 . PMLR, 2014 . J. Donahue, Y. Jia, O. Vinyals, J. Hoffman, N. Zhang, E. Tzeng, and T. Darrell. Decaf: A deep convolutional activation feature for generic visual recognition. In International conference on machine learning, pages 647--655. PMLR, 2014."},{"key":"e_1_3_2_1_35_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy A.","year":"2020","unstructured":"A. Dosovitskiy , L. Beyer , A. Kolesnikov , D. Weissenborn , X. Zhai , T. Unterthiner , M. Dehghani , M. Minderer , G. Heigold , S. Gelly , An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 , 2020 . A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, et al. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00915"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3539765"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00162"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5824"},{"key":"e_1_3_2_1_41_1","volume-title":"Learning both weights and connections for efficient neural network. Advances in neural information processing systems, 28","author":"Han S.","year":"2015","unstructured":"S. Han , J. Pool , J. Tran , and W. Dally . Learning both weights and connections for efficient neural network. Advances in neural information processing systems, 28 , 2015 . S. Han, J. Pool, J. Tran, and W. Dally. Learning both weights and connections for efficient neural network. Advances in neural information processing systems, 28, 2015."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-12-741252-8.50010-8"},{"key":"e_1_3_2_1_45_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu E. J.","year":"2021","unstructured":"E. J. Hu , Y. Shen , P. Wallis , Z. Allen-Zhu , Y. Li , S. Wang , L. Wang , and W. Chen . Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 , 2021 . E. J. Hu, Y. Shen, P. Wallis, Z. Allen-Zhu, Y. Li, S. Wang, L. Wang, and W. Chen. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685, 2021."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2015.58"},{"key":"e_1_3_2_1_47_1","first-page":"448","volume-title":"International conference on machine learning","author":"Ioffe S.","year":"2015","unstructured":"S. Ioffe and C. Szegedy . Batch normalization: Accelerating deep network training by reducing internal covariate shift . In International conference on machine learning , pages 448 -- 456 . PMLR, 2015 . S. Ioffe and C. Szegedy. Batch normalization: Accelerating deep network training by reducing internal covariate shift. In International conference on machine learning, pages 448--456. PMLR, 2015."},{"key":"e_1_3_2_1_48_1","volume-title":"Continuously constructive deep neural networks","author":"Irsoy O.","year":"2019","unstructured":"O. Irsoy and E. Alpayd\u0131n . Continuously constructive deep neural networks . IEEE transactions on neural networks and learning systems, 31(4):1124--1133, 2019 . O. Irsoy and E. Alpayd\u0131n. Continuously constructive deep neural networks. IEEE transactions on neural networks and learning systems, 31(4):1124--1133, 2019."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538948"},{"key":"e_1_3_2_1_50_1","article-title":"Model pruning enables efficient federated learning on edge devices","author":"Jiang Y.","year":"2022","unstructured":"Y. Jiang , S. Wang , V. Valls , B. J. Ko , W.-H. Lee , K. K. Leung , and L. Tassiulas . Model pruning enables efficient federated learning on edge devices . IEEE Transactions on Neural Networks and Learning Systems , 2022 . Y. Jiang, S. Wang, V. Valls, B. J. Ko, W.-H. Lee, K. K. Leung, and L. Tassiulas. Model pruning enables efficient federated learning on edge devices. IEEE Transactions on Neural Networks and Learning Systems, 2022.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9898051"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11651"},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. CVPR workshop on fine-grained visual categorization (FGVC)","volume":"2","author":"Khosla A.","year":"2011","unstructured":"A. Khosla , N. Jayadevaprakash , B. Yao , and F.-F. Li . Novel dataset for fine-grained image categorization: Stanford dogs . In Proc. CVPR workshop on fine-grained visual categorization (FGVC) , volume 2 . Citeseer , 2011 . A. Khosla, N. Jayadevaprakash, B. Yao, and F.-F. Li. Novel dataset for fine-grained image categorization: Stanford dogs. In Proc. CVPR workshop on fine-grained visual categorization (FGVC), volume 2. Citeseer, 2011."},{"key":"e_1_3_2_1_54_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma D. P.","year":"2014","unstructured":"D. P. Kingma and J. Ba . Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 , 2014 . D. P. Kingma and J. Ba. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980, 2014."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"e_1_3_2_1_56_1","volume-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky A.","year":"2009","unstructured":"A. Krizhevsky , G. Hinton , Learning multiple layers of features from tiny images . 2009 . A. Krizhevsky, G. Hinton, et al. Learning multiple layers of features from tiny images. 2009."},{"key":"e_1_3_2_1_57_1","volume-title":"Rma: Rapid motor adaptation for legged robots. arXiv preprint arXiv:2107.04034","author":"Kumar A.","year":"2021","unstructured":"A. Kumar , Z. Fu , D. Pathak , and J. Malik . Rma: Rapid motor adaptation for legged robots. arXiv preprint arXiv:2107.04034 , 2021 . A. Kumar, Z. Fu, D. Pathak, and J. Malik. Rma: Rapid motor adaptation for legged robots. arXiv preprint arXiv:2107.04034, 2021."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483278"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413960"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3517017"},{"key":"e_1_3_2_1_61_1","volume-title":"Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710","author":"Li H.","year":"2016","unstructured":"H. Li , A. Kadav , I. Durdanovic , H. Samet , and H. P. Graf . Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710 , 2016 . H. Li, A. Kadav, I. Durdanovic, H. Samet, and H. P. Graf. Pruning filters for efficient convnets. arXiv preprint arXiv:1608.08710, 2016."},{"key":"e_1_3_2_1_62_1","volume-title":"On-device training under 256kb memory. arXiv preprint arXiv:2206.15472","author":"Lin J.","year":"2022","unstructured":"J. Lin , L. Zhu , W.-M. Chen , W.-C. Wang , C. Gan , and S. Han . On-device training under 256kb memory. arXiv preprint arXiv:2206.15472 , 2022 . J. Lin, L. Zhu, W.-M. Chen, W.-C. Wang, C. Gan, and S. Han. On-device training under 256kb memory. arXiv preprint arXiv:2206.15472, 2022."},{"key":"e_1_3_2_1_63_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov I.","year":"2017","unstructured":"I. Loshchilov and F. Hutter . Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 , 2017 . I. Loshchilov and F. Hutter. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101, 2017."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356156"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01152"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSW.2019.8755604"},{"key":"e_1_3_2_1_67_1","volume-title":"K for the price of 1: Parameter-efficient multi-task and transfer learning. arXiv preprint arXiv:1810.10703","author":"Mudrakarta P. K.","year":"2018","unstructured":"P. K. Mudrakarta , M. Sandler , A. Zhmoginov , and A. Howard . K for the price of 1: Parameter-efficient multi-task and transfer learning. arXiv preprint arXiv:1810.10703 , 2018 . P. K. Mudrakarta, M. Sandler, A. Zhmoginov, and A. Howard. K for the price of 1: Parameter-efficient multi-task and transfer learning. arXiv preprint arXiv:1810.10703, 2018."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248092"},{"key":"e_1_3_2_1_69_1","volume-title":"et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32","author":"Paszke A.","year":"2019","unstructured":"A. Paszke , S. Gross , F. Massa , A. Lerer , J. Bradbury , G. Chanan , T. Killeen , Z. Lin , N. Gimelshein , L. Antiga , et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32 , 2019 . A. Paszke, S. Gross, F. Massa, A. Lerer, J. Bradbury, G. Chanan, T. Killeen, Z. Lin, N. Gimelshein, L. Antiga, et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 32, 2019."},{"key":"e_1_3_2_1_70_1","volume-title":"Split learning for collaborative deep learning in healthcare. arXiv preprint arXiv:1912.12115","author":"Poirot M. G.","year":"2019","unstructured":"M. G. Poirot , P. Vepakomma , K. Chang , J. Kalpathy-Cramer , R. Gupta , and R. Raskar . Split learning for collaborative deep learning in healthcare. arXiv preprint arXiv:1912.12115 , 2019 . M. G. Poirot, P. Vepakomma, K. Chang, J. Kalpathy-Cramer, R. Gupta, and R. Raskar. Split learning for collaborative deep learning in healthcare. arXiv preprint arXiv:1912.12115, 2019."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2014.131"},{"key":"e_1_3_2_1_74_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan K.","year":"2014","unstructured":"K. Simonyan and A. Zisserman . Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 , 2014 . K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812166"},{"key":"e_1_3_2_1_76_1","first-page":"3299","volume-title":"International Conference on Machine Learning","author":"Sun X.","year":"2017","unstructured":"X. Sun , X. Ren , S. Ma , and H. Wang . meprop: Sparsified back propagation for accelerated deep learning with reduced overfitting . In International Conference on Machine Learning , pages 3299 -- 3308 . PMLR, 2017 . X. Sun, X. Ren, S. Ma, and H. Wang. meprop: Sparsified back propagation for accelerated deep learning with reduced overfitting. In International Conference on Machine Learning, pages 3299--3308. PMLR, 2017."},{"key":"e_1_3_2_1_77_1","first-page":"3319","volume-title":"International conference on machine learning","author":"Sundararajan M.","year":"2017","unstructured":"M. Sundararajan , A. Taly , and Q. Yan . Axiomatic attribution for deep networks . In International conference on machine learning , pages 3319 -- 3328 . PMLR, 2017 . M. Sundararajan, A. Taly, and Q. Yan. Axiomatic attribution for deep networks. In International conference on machine learning, pages 3319--3328. PMLR, 2017."},{"issue":"2","key":"e_1_3_2_1_78_1","doi-asserted-by":"crossref","first-page":"261","DOI":"10.1109\/TPDS.2008.78","article-title":"Improving performance of dynamic programming via parallelism and locality on multicore architectures","volume":"20","author":"Tan G.","year":"2008","unstructured":"G. Tan , N. Sun , and G. R. Gao . Improving performance of dynamic programming via parallelism and locality on multicore architectures . IEEE Transactions on Parallel and Distributed Systems , 20 ( 2 ): 261 -- 274 , 2008 . G. Tan, N. Sun, and G. R. Gao. Improving performance of dynamic programming via parallelism and locality on multicore architectures. IEEE Transactions on Parallel and Distributed Systems, 20(2):261--274, 2008.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995347"},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01258"},{"key":"e_1_3_2_1_81_1","volume-title":"Attention is all you need. Advances in neural information processing systems, 30","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani , N. Shazeer , N. Parmar , J. Uszkoreit , L. Jones , A. N. Gomez , \u0141. Kaiser, and I. Polosukhin . Attention is all you need. Advances in neural information processing systems, 30 , 2017 . A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A. N. Gomez, \u0141. Kaiser, and I. Polosukhin. Attention is all you need. Advances in neural information processing systems, 30, 2017."},{"key":"e_1_3_2_1_82_1","volume-title":"Split learning for health: Distributed deep learning without sharing raw patient data. arXiv preprint arXiv:1812.00564","author":"Vepakomma P.","year":"2018","unstructured":"P. Vepakomma , O. Gupta , T. Swedish , and R. Raskar . Split learning for health: Distributed deep learning without sharing raw patient data. arXiv preprint arXiv:1812.00564 , 2018 . P. Vepakomma, O. Gupta, T. Swedish, and R. Raskar. Split learning for health: Distributed deep learning without sharing raw patient data. arXiv preprint arXiv:1812.00564, 2018."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1109\/CAMAN.2011.5778742"},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560545"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287075"},{"key":"e_1_3_2_1_88_1","first-page":"937","volume-title":"2018 USENIX Annual Technical Conference (USENIX ATC 18)","author":"Xu S.","year":"2018","unstructured":"S. Xu , H. Zhang , G. Neubig , W. Dai , J. K. Kim , Z. Deng , Q. Ho , G. Yang , and E. P. Xing . Cavs: An efficient runtime system for dynamic neural networks . In 2018 USENIX Annual Technical Conference (USENIX ATC 18) , pages 937 -- 950 , 2018 . S. Xu, H. Zhang, G. Neubig, W. Dai, J. K. Kim, Z. Deng, Q. Ho, G. Yang, and E. P. Xing. Cavs: An efficient runtime system for dynamic neural networks. In 2018 USENIX Annual Technical Conference (USENIX ATC 18), pages 937--950, 2018."},{"key":"e_1_3_2_1_89_1","volume-title":"Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547","author":"Yoon J.","year":"2017","unstructured":"J. Yoon , E. Yang , J. Lee , and S. J. Hwang . Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547 , 2017 . J. Yoon, E. Yang, J. Lee, and S. J. Hwang. Lifelong learning with dynamically expandable networks. arXiv preprint arXiv:1708.01547, 2017."},{"key":"e_1_3_2_1_90_1","volume-title":"Bitfit: Simple parameter-efficient fine-tuning for transformer-based masked language-models. arXiv preprint arXiv:2106.10199","author":"Zaken E. B.","year":"2021","unstructured":"E. B. Zaken , S. Ravfogel , and Y. Goldberg . Bitfit: Simple parameter-efficient fine-tuning for transformer-based masked language-models. arXiv preprint arXiv:2106.10199 , 2021 . E. B. Zaken, S. Ravfogel, and Y. Goldberg. Bitfit: Simple parameter-efficient fine-tuning for transformer-based masked language-models. arXiv preprint arXiv:2106.10199, 2021."},{"key":"e_1_3_2_1_91_1","doi-asserted-by":"publisher","DOI":"10.1145\/3384419.3430716"},{"key":"e_1_3_2_1_92_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.11"}],"event":{"name":"MobiSys '23: 21st Annual International Conference on Mobile Systems, Applications and Services","location":"Helsinki Finland","acronym":"MobiSys '23","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 21st Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581791.3596852","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3581791.3596852","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:36:31Z","timestamp":1750178191000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581791.3596852"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,18]]},"references-count":91,"alternative-id":["10.1145\/3581791.3596852","10.1145\/3581791"],"URL":"https:\/\/doi.org\/10.1145\/3581791.3596852","relation":{},"subject":[],"published":{"date-parts":[[2023,6,18]]},"assertion":[{"value":"2023-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}