{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T07:58:52Z","timestamp":1780473532631,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Research Grants Council of Hong Kong","award":["GRF (CityU 11202623)"],"award-info":[{"award-number":["GRF (CityU 11202623)"]}]},{"DOI":"10.13039\/100007567","name":"City University of Hong Kong","doi-asserted-by":"publisher","award":["APRC (9610633)"],"award-info":[{"award-number":["APRC (9610633)"]}],"id":[{"id":"10.13039\/100007567","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3643832.3661878","type":"proceedings-article","created":{"date-parts":[[2024,6,4]],"date-time":"2024-06-04T17:14:23Z","timestamp":1717521263000},"page":"465-478","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["Pantheon: Preemptible Multi-DNN Inference on Mobile Edge GPUs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5350-6046","authenticated-orcid":false,"given":"Lixiang","family":"Han","sequence":"first","affiliation":[{"name":"City University of Hong Kong, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5457-6967","authenticated-orcid":false,"given":"Zimu","family":"Zhou","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3296-3392","authenticated-orcid":false,"given":"Zhenjiang","family":"Li","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong SAR, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"https:\/\/www.tensorflow.org\/","year":"2023","unstructured":"Tensorflow. https:\/\/www.tensorflow.org\/, 2023."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS.2017.00017"},{"key":"e_1_3_2_1_3_1","volume-title":"https:\/\/developer.arm.com\/documentation\/den0024\/a","author":"ARM.","year":"2015","unstructured":"ARM. Arm\u00ae cortex\u00ae-a series programmer's guide for armv8-a. https:\/\/developer.arm.com\/documentation\/den0024\/a, 2015."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/0967-0661(93)92105-D"},{"key":"e_1_3_2_1_5_1","volume-title":"Proc. of USENIX ATC","author":"Bateni Soroush","year":"2020","unstructured":"Soroush Bateni and Cong Liu. Neuos: A latency-predictable multi-dimensional optimization framework for dnn-driven autonomous systems. In Proc. of USENIX ATC, 2020."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560905.3568544"},{"key":"e_1_3_2_1_7_1","volume-title":"Proc. of USENIX USDI","author":"Crankshaw Daniel","year":"2017","unstructured":"Daniel Crankshaw, Xin Wang, Guilio Zhou, Michael J Franklin, Joseph E Gonzalez, and Ion Stoica. Clipper: A low-latency online prediction serving system. In Proc. of USENIX USDI, 2017."},{"key":"e_1_3_2_1_8_1","volume-title":"Proc. of ACM BuildSys","author":"Ding Xianzhong","year":"2019","unstructured":"Xianzhong Ding, Wan Du, and Alberto Cerpa. Octopus: Deep reinforcement learning for holistic smart building control. In Proc. of ACM BuildSys, 2019."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2017.8324578"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2014.2359646"},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of ACM MobiCom","author":"Fang Biyi","year":"2018","unstructured":"Biyi Fang, Xiao Zeng, and Mi Zhang. Nestdnn: Resource-aware multi-tenant on-device deep learning for continuous mobile vision. In Proc. of ACM MobiCom, 2018."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.16"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/IV47402.2020.9304627"},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. of ACM MobiCom","author":"Georgiev Petko","year":"2016","unstructured":"Petko Georgiev, Nicholas D Lane, Kiran K Rachuri, and Cecilia Mascolo. Leo: Scheduling sensor inference algorithms across heterogeneous mobile processors and network resources. In Proc. of ACM MobiCom, 2016."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-42051-1_16"},{"key":"e_1_3_2_1_16_1","volume-title":"Proc. of USENIX OSDI","author":"Han Mingcong","year":"2022","unstructured":"Mingcong Han, Hanze Zhang, Rong Chen, and Haibo Chen. Microsecond-scale preemption for concurrent GPU-accelerated DNN inferences. In Proc. of USENIX OSDI, 2022."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00014"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2019.00021"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483249"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2013.6706807"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM.2019.8737614"},{"key":"e_1_3_2_1_24_1","volume-title":"Proc. of ICLR","author":"Huang Gao","year":"2018","unstructured":"Gao Huang, Danlu Chen, Tianhong Li, Felix Wu, Laurens Van Der Maaten, and Kilian Q Weinberger. Multi-scale dense networks for resource efficient image classification. In Proc. of ICLR, 2018."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560551"},{"key":"e_1_3_2_1_26_1","volume-title":"Gpu market size & share analysis - growth trends & forecasts (2023 -","author":"Intelligence Modor","year":"2028","unstructured":"Modor Intelligence. Gpu market size & share analysis - growth trends & forecasts (2023 - 2028). https:\/\/www.mordorintelligence.com\/industry-reports\/graphics-processing-unit-market, 2023."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581791.3596845"},{"key":"e_1_3_2_1_29_1","volume-title":"Proc. of ACM MobiSys","author":"Jeong Joo Seong","year":"2022","unstructured":"Joo Seong Jeong, Jingyu Lee, Donghyun Kim, Changmin Jeon, Changjin Jeong, Youngki Lee, and Byung-Gon Chun. Band: coordinated multi-dnn inference on heterogeneous mobile processors. In Proc. of ACM MobiSys, 2022."},{"key":"e_1_3_2_1_30_1","volume-title":"Proc. of ACM MobiSys","author":"Jia Fucheng","year":"2022","unstructured":"Fucheng Jia, Deyu Zhang, Ting Cao, Shiqi Jiang, Yunxin Liu, Ju Ren, and Yaoxue Zhang. Codl: efficient cpu-gpu co-execution for deep learning inference on mobile devices. In Proc. of ACM MobiSys, 2022."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483274"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS52674.2021.00038"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPSN.2016.7460664"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3400302.3415698"},{"key":"e_1_3_2_1_35_1","volume-title":"Proc. of ACM EMDL","author":"Lee Jingyu","year":"2021","unstructured":"Jingyu Lee, Yunxin Liu, and Youngki Lee. Parallelfusion: towards maximum utilization of mobile gpu for dnn inference. In Proc. of ACM EMDL, 2021."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/REAL.1989.63567"},{"key":"e_1_3_2_1_37_1","volume-title":"Proc. of USENIX OSDI","author":"Li Zhuohan","year":"2023","unstructured":"Zhuohan Li, Lianmin Zheng, Yinmin Zhong, Vincent Liu, Ying Sheng, Xin Jin, Yanping Huang, Zhifeng Chen, Hao Zhang, Joseph E Gonzalez, et al. Alpaserve: Statistical multiplexing with model parallelism for deep learning serving. In Proc. of USENIX OSDI, 2023."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3592524"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173191"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560905.3567772"},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of ACM SenSys","author":"Ling Neiwen","year":"2022","unstructured":"Neiwen Ling, Xuan Huang, Zhihe Zhao, Nan Guan, Zhenyu Yan, and Guoliang Xing. Blastnet: Exploiting duo-blocks for cross-processor real-time dnn inference. In Proc. of ACM SenSys, 2022."},{"key":"e_1_3_2_1_42_1","volume-title":"Proc. of ACM SenSys","author":"Ling Neiwen","year":"2021","unstructured":"Neiwen Ling, Kai Wang, Yuze He, Guoliang Xing, and Daqi Xie. Rt-mdl: Supporting real-time mixed deep learning tasks on edge platforms. In Proc. of ACM SenSys, 2021."},{"key":"e_1_3_2_1_43_1","volume-title":"Pearson Education India","author":"Jane","year":"2006","unstructured":"Jane WS Liu et al. Real-time systems. Pearson Education India, 2006."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3300116"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceeding of the ACM on IMWUT","author":"Liu Sicong","year":"2021","unstructured":"Sicong Liu, Bin Guo, Ke Ma, Zhiwen Yu, and Junzhao Du. Adaspring: Context-adaptive and runtime-evolutionary deep model compression for mobile applications. Proceeding of the ACM on IMWUT, 2021."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3210240.3210337"},{"key":"e_1_3_2_1_47_1","volume-title":"Performance tuning guide - use onednn graph with torchscript for inference. https:\/\/pytorch.org\/tutorials\/recipes\/recipes\/tuning_guide.html#use-onednn-graph-with-torchscript-for-inference","author":"Migacz Szymon","year":"2023","unstructured":"Szymon Migacz. Performance tuning guide - use onednn graph with torchscript for inference. https:\/\/pytorch.org\/tutorials\/recipes\/recipes\/tuning_guide.html#use-onednn-graph-with-torchscript-for-inference, 2023."},{"key":"e_1_3_2_1_48_1","volume-title":"Oregon wildlife. https:\/\/www.kaggle.com\/datasets\/virtualdvid\/oregon-wildlife","author":"Molina David","year":"2018","unstructured":"David Molina. Oregon wildlife. https:\/\/www.kaggle.com\/datasets\/virtualdvid\/oregon-wildlife, 2018."},{"key":"e_1_3_2_1_49_1","volume-title":"Cuda runtime api. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/index.html","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Cuda runtime api. https:\/\/docs.nvidia.com\/cuda\/cuda-runtime-api\/index.html, 2023."},{"key":"e_1_3_2_1_50_1","volume-title":"Nvidia jetson. https:\/\/www.nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Nvidia jetson. https:\/\/www.nvidia.com\/en-us\/autonomous-machines\/embedded-systems\/, 2023."},{"key":"e_1_3_2_1_51_1","volume-title":"Tuning cuda applications for volta. https:\/\/docs.nvidia.com\/cuda\/volta-tuning-guide\/index.html","author":"NVIDIA.","year":"2023","unstructured":"NVIDIA. Tuning cuda applications for volta. https:\/\/docs.nvidia.com\/cuda\/volta-tuning-guide\/index.html, 2023."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2021.102456"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560519"},{"key":"e_1_3_2_1_54_1","volume-title":"et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems, 2019."},{"key":"e_1_3_2_1_55_1","volume-title":"Wildfire smoke dataset. https:\/\/public.roboflow.com\/object-detection\/wildfire-smoke","year":"2020","unstructured":"roboflow. Wildfire smoke dataset. https:\/\/public.roboflow.com\/object-detection\/wildfire-smoke, 2020."},{"key":"e_1_3_2_1_56_1","volume-title":"Proc. of IEEE CVPR","author":"Sandler Mark","year":"2018","unstructured":"Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, and Liang-Chieh Chen. Mobilenetv2: Inverted residuals and linear bottlenecks. In Proc. of IEEE CVPR, 2018."},{"key":"e_1_3_2_1_57_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_58_1","volume-title":"Operating systems: internals and design principles","author":"Stallings William","year":"2011","unstructured":"William Stallings. Operating systems: internals and design principles. Prentice Hall Press, 2011."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2017.2761740"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2016.7900006"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3448625"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM41043.2020.9155402"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/SECON55815.2022.9918563"},{"key":"e_1_3_2_1_65_1","volume-title":"https:\/\/en.wikipedia.org\/wiki\/Preemption_(computing)","author":"Preemption","year":"2023","unstructured":"Wikipedia. Preemption (computing). https:\/\/en.wikipedia.org\/wiki\/Preemption_(computing), 2023."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"e_1_3_2_1_67_1","volume-title":"Proc. of ACM SenSys","author":"Xu Huatao","year":"2021","unstructured":"Huatao Xu, Pengfei Zhou, Rui Tan, Mo Li, and Guobin Shen. Limu-bert: Unleashing the potential of unlabeled data for imu sensing applications. In Proc. of ACM SenSys, 2021."},{"key":"e_1_3_2_1_68_1","volume-title":"Proc. of ACM MobiCom","author":"Yi Juheon","year":"2020","unstructured":"Juheon Yi, Sunghyun Choi, and Youngki Lee. Eagleeye: Wearable camera-based person identification in crowded urban spaces. In Proc. of ACM MobiCom, 2020."},{"key":"e_1_3_2_1_69_1","volume-title":"Proc. of ACM MobiCom","author":"Yi Juheon","year":"2020","unstructured":"Juheon Yi and Youngki Lee. Heimdall: mobile gpu coordination platform for augmented reality applications. In Proc. of ACM MobiCom, 2020."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625687.3625789"}],"event":{"name":"MOBISYS '24: 22nd Annual International Conference on Mobile Systems, Applications and Services","location":"Minato-ku, Tokyo Japan","acronym":"MOBISYS '24","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 22nd Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661878","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643832.3661878","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:07Z","timestamp":1750291387000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661878"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":68,"alternative-id":["10.1145\/3643832.3661878","10.1145\/3643832"],"URL":"https:\/\/doi.org\/10.1145\/3643832.3661878","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}