{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T07:03:45Z","timestamp":1780988625875,"version":"3.54.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172031"],"award-info":[{"award-number":["62172031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172031"],"award-info":[{"award-number":["62172031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172031"],"award-info":[{"award-number":["62172031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172031"],"award-info":[{"award-number":["62172031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172031"],"award-info":[{"award-number":["62172031"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["L191019"],"award-info":[{"award-number":["L191019"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["L191019"],"award-info":[{"award-number":["L191019"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["L191019"],"award-info":[{"award-number":["L191019"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["L191019"],"award-info":[{"award-number":["L191019"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Natural Science Foundation of Beijing Municipality","doi-asserted-by":"publisher","award":["L191019"],"award-info":[{"award-number":["L191019"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s11554-025-01670-6","type":"journal-article","created":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T16:30:28Z","timestamp":1744389028000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Efficient NPU\u2013GPU scheduling for real-time deep learning inference on mobile devices"],"prefix":"10.1007","volume":"22","author":[{"given":"Chengwu","family":"Yu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Meng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wanqi","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weiwei","family":"Fang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yanming","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Neal","family":"N.Xiong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,4,11]]},"reference":[{"issue":"3","key":"1670_CR1","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1109\/JPROC.2022.3153408","volume":"110","author":"T Zhao","year":"2022","unstructured":"Zhao, T., Xie, Y., Wang, Y., Cheng, J., Guo, X., Hu, B., et al.: A survey of deep learning on mobile devices: applications, optimizations, challenges, and research opportunities. Proceed IEEE. 110(3), 334\u2013354 (2022)","journal-title":"Proceed IEEE."},{"issue":"1","key":"1670_CR2","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/JPROC.2022.3226481","volume":"111","author":"MMH Shuvo","year":"2022","unstructured":"Shuvo, M.M.H., Islam, S.K., Cheng, J., Morshed, B.I.: Efficient acceleration of deep learning inference on resource-constrained edge devices: a review. Proceedings of the IEEE. 111(1), 42\u201391 (2022)","journal-title":"Proceedings of the IEEE."},{"issue":"03","key":"1670_CR3","first-page":"1433","volume":"22","author":"T Tan","year":"2023","unstructured":"Tan, T., Cao, G.: Deep learning video analytics through Edge computing and neural processing units on mobile devices. IEEE Trans. Mobile Comp. 22(03), 1433\u20131448 (2023)","journal-title":"IEEE Trans. Mobile Comp."},{"issue":"2","key":"1670_CR4","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1007\/s11554-024-01435-7","volume":"21","author":"V Sharma","year":"2024","unstructured":"Sharma, V., Sharma, A., Saini, S.: Real-time attention-based embedded LSTM for dynamic sign language recognition on edge devices. J. Real-Time Image Process. 21(2), 53 (2024)","journal-title":"J. Real-Time Image Process."},{"issue":"12","key":"1670_CR5","doi-asserted-by":"publisher","first-page":"3383","DOI":"10.1109\/TC.2023.3299030","volume":"72","author":"J Choi","year":"2023","unstructured":"Choi, J., Ha, Y., Lee, J., Lee, S., Lee, J., Jang, H., et al.: Enabling fine-grained spatial multitasking on systolic-array NPUs using dataflow mirroring. IEEE Transact. Comput. 72(12), 3383\u20133398 (2023)","journal-title":"IEEE Transact. Comput."},{"key":"1670_CR6","doi-asserted-by":"crossref","unstructured":"Jung, H., Ji, H., Pushchin, A., Ostapenko, M., Niu, W., Palachev, I., et\u00a0al.: Accelerating deep neural Networks on mobile multicore NPUs. In: Proceedings of the 21st ACM\/IEEE International Symposium on Code Generation and Optimization. p. 236\u2013248 (2023)","DOI":"10.1145\/3579990.3580015"},{"key":"1670_CR7","doi-asserted-by":"crossref","unstructured":"Cabrera, A., Hitefield, S., Kim, J., Lee, S., Miniskar, N.R., Vetter, J.S.: Toward performance portable programming for heterogeneous systems on a chip: a case study with qualcomm snapdragon soc. In: IEEE High performance extreme computing conference (HPEC). IEEE 2021, 1\u20137 (2021)","DOI":"10.1109\/HPEC49654.2021.9622794"},{"issue":"11","key":"1670_CR8","doi-asserted-by":"publisher","first-page":"7436","DOI":"10.1109\/TPAMI.2021.3117837","volume":"44","author":"Y Han","year":"2021","unstructured":"Han, Y., Huang, G., Song, S., Yang, L., Wang, H., Wang, Y.: Dynamic neural networks: a survey. IEEE Transact. Patter. Analy. Machine Intell. 44(11), 7436\u20137456 (2021)","journal-title":"IEEE Transact. Patter. Analy. Machine Intell."},{"key":"1670_CR9","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.: Learning both weights and connections for efficient neural network. Advances in neural information processing systems. 28 (2015)"},{"key":"1670_CR10","unstructured":"Han, S., Mao, H., Dally, W. J.: Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149. (2015)"},{"key":"1670_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107636","volume":"110","author":"M Liu","year":"2021","unstructured":"Liu, M., Fang, W., Ma, X., Xu, W., Xiong, N., Ding, Y.: Channel pruning guided by spatial and channel attention for DNNs in intelligent edge computing. App. Soft Comp. 110, 107636 (2021)","journal-title":"App. Soft Comp."},{"key":"1670_CR12","doi-asserted-by":"crossref","unstructured":"Ma, X., Fang, W.: Deep reinforcement learning based multi-task automated channel pruning for DNNs. In: 2023 International Joint Conference on Neural Networks (IJCNN). IEEE; p. 1\u20139 (2023)","DOI":"10.1109\/IJCNN54540.2023.10191092"},{"key":"1670_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107281","volume":"105","author":"H Qin","year":"2020","unstructured":"Qin, H., Gong, R., Liu, X., Bai, X., Song, J., Sebe, N.: Binary neural networks: a survey. Patter. Recog. 105, 107281 (2020)","journal-title":"Patter. Recog."},{"key":"1670_CR14","doi-asserted-by":"crossref","unstructured":"Alemdar, H., Leroy, V., Prost-Boucle, A., P\u00e9trot, F.: Ternary neural networks for resource-efficient AI applications. In: international joint conference on neural networks (IJCNN). IEEE 2017, 2547\u20132554 (2017)","DOI":"10.1109\/IJCNN.2017.7966166"},{"key":"1670_CR15","unstructured":"Wu, X., Li, C., Aminabadi, R. Y., Yao, Z., He, Y.: Understanding int4 quantization for language models: latency speedup, composability, and failure cases. In: International Conference on Machine Learning. PMLR. p. 37524\u201337539 (2023)"},{"key":"1670_CR16","first-page":"3483","volume":"35","author":"K Zhao","year":"2021","unstructured":"Zhao, K., Huang, S., Pan, P., Li, Y., Zhang, Y., Gu, Z., et al.: Distribution adaptive int8 quantization for training cnns. In: Proceedings of the AAAI Conference on Artificial Intelligence. 35, 3483\u20133491 (2021)","journal-title":"In: Proceedings of the AAAI Conference on Artificial Intelligence."},{"key":"1670_CR17","first-page":"87","volume":"6","author":"J Lin","year":"2024","unstructured":"Lin, J., Tang, J., Tang, H., Yang, S., Chen, W.M., Wang, W.C., et al.: AWQ: activation-aware weight quantization for on-device llm compression and acceleration. Proceedings of Machine Learning and Systems. 6, 87\u2013100 (2024)","journal-title":"Proceedings of Machine Learning and Systems."},{"key":"1670_CR18","unstructured":"Hinton, G.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531. (2015)"},{"key":"1670_CR19","doi-asserted-by":"crossref","unstructured":"Teerapittayanon, S., McDanel, B., Kung, H.T.: Branchynet: Fast inference via early exiting from deep neural networks. In: 23rd international conference on pattern recognition (ICPR). IEEE 2016, 2464\u20132469 (2016)","DOI":"10.1109\/ICPR.2016.7900006"},{"issue":"5","key":"1670_CR20","doi-asserted-by":"publisher","first-page":"954","DOI":"10.1007\/s12559-020-09734-4","volume":"12","author":"S Scardapane","year":"2020","unstructured":"Scardapane, S., Scarpiniti, M., Baccarelli, E., Uncini, A.: Why should we add early exits to neural networks? Cognit. Comput. 12(5), 954\u2013966 (2020)","journal-title":"Cognit. Comput."},{"issue":"2","key":"1670_CR21","doi-asserted-by":"publisher","first-page":"2842","DOI":"10.1109\/JIOT.2023.3293506","volume":"11","author":"Q Wang","year":"2024","unstructured":"Wang, Q., Fang, W., Xiong, N.N.: TLEE: temporal-wise and layer-wise early exiting network for efficient video recognition on edge devices. IEEE Internet of Things Journal. 11(2), 2842\u20132854 (2024)","journal-title":"IEEE Internet of Things Journal."},{"key":"1670_CR22","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126690","volume":"558","author":"X Gao","year":"2023","unstructured":"Gao, X., Liu, Y., Huang, T., Hou, Z.: PF-BERxiT: Early exiting for BERT with parameter-efficient fine-tuning and flexible early exiting strategy. Neurocomputing. 558, 126690 (2023)","journal-title":"Neurocomputing."},{"key":"1670_CR23","doi-asserted-by":"crossref","unstructured":"Huynh, L. N., Lee, Y., Balan, R. K.: Deepmon: Mobile gpu-based deep learning framework for continuous vision applications. In: Proceedings of the 15th Annual International Conference on Mobile Systems, Applications, and Services. p. 82\u201395 (2017)","DOI":"10.1145\/3081333.3081360"},{"key":"1670_CR24","doi-asserted-by":"crossref","unstructured":"Kim, Y., Kim, J., Chae, D., Kim, D., Kim, J.: $$\\mu$$layer: Low latency on-device inference using cooperative single-layer acceleration and processor-friendly quantization. In: Proceedings of the Fourteenth EuroSys Conference 2019. p. 1\u201315 (2019)","DOI":"10.1145\/3302424.3303950"},{"issue":"3","key":"1670_CR25","first-page":"1275","volume":"22","author":"Z Xu","year":"2021","unstructured":"Xu, Z., Yang, D., Yin, C., Tang, J., Wang, Y., Xue, G.: A co-scheduling framework for DNN models on mobile and edge devices with heterogeneous hardware. IEEE Transactions on Mobile Computing. 22(3), 1275\u20131288 (2021)","journal-title":"IEEE Transactions on Mobile Computing."},{"key":"1670_CR26","doi-asserted-by":"crossref","unstructured":"Jia, F., Zhang, D., Cao, T., Jiang, S., Liu, Y., Ren, J., et\u00a0al.: CoDL: efficient CPU-GPU co-execution for deep learning inference on mobile devices. In: Proceedings of the 20th Annual International Conference on Mobile Systems, Applications and Services. p. 209\u2013221 (2022)","DOI":"10.1145\/3498361.3538932"},{"key":"1670_CR27","doi-asserted-by":"crossref","unstructured":"Lin, X., Liu, R., Xie, J., Wei, Q., Zhou, Z., Chen, X., et\u00a0al.: Online Scheduling of CPU-NPU Co-inference for Edge AI Tasks. In: 2023 IEEE Wireless Communications and Networking Conference (WCNC). p. 1\u20136 (2023)","DOI":"10.1109\/WCNC55385.2023.10118755"},{"issue":"6","key":"1670_CR28","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. Communications of the ACM. 60(6), 84\u201390 (2017)","journal-title":"Communications of the ACM."},{"key":"1670_CR29","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. p. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1670_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2023.103830","volume":"93","author":"K Tong","year":"2023","unstructured":"Tong, K., Wu, Y.: Rethinking PASCAL-VOC and MS-COCO dataset for small object detection. Journal of Visual Communication and Image Representation. 93, 103830 (2023)","journal-title":"Journal of Visual Communication and Image Representation."},{"key":"1670_CR31","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1670_CR32","doi-asserted-by":"crossref","unstructured":"Hadidi, R., Cao, J., Xie, Y., Asgari, B., Krishna, T., Kim, H.: Characterizing the deployment of deep neural networks on commercial edge devices. In: IEEE International Symposium on Workload Characterization (IISWC). IEEE 2019, 35\u201348 (2019)","DOI":"10.1109\/IISWC47752.2019.9041955"},{"key":"1670_CR33","doi-asserted-by":"publisher","first-page":"679","DOI":"10.1109\/TPAMI.1986.4767851","volume":"6","author":"J Canny","year":"1986","unstructured":"Canny, J.: A computational approach to edge detection. IEEE Transactions on pattern analysis and machine intelligence. 6, 679\u2013698 (1986)","journal-title":"IEEE Transactions on pattern analysis and machine intelligence."},{"issue":"2","key":"1670_CR34","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1109\/TPAMI.2017.2670560","volume":"40","author":"YG Jiang","year":"2018","unstructured":"Jiang, Y.G., Wu, Z., Wang, J., Xue, X., Chang, S.F.: Exploiting feature and class relationships in video categorization with regularized deep neural networks. IEEE Transactions on Pattern Analysis and Machine Intelligence. 40(2), 352\u2013364 (2018)","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence."},{"key":"1670_CR35","doi-asserted-by":"crossref","unstructured":"Kellerer, H., Pferschy, U., Pisinger, D., Kellerer, H., Pferschy, U., Pisinger, D.: Introduction to NP-Completeness of knapsack problems. Knapsack problems. 483\u2013493, (2004)","DOI":"10.1007\/978-3-540-24777-7_16"},{"issue":"3","key":"1670_CR36","doi-asserted-by":"publisher","first-page":"643","DOI":"10.1006\/jmaa.1994.1324","volume":"186","author":"NG Sancho","year":"1994","unstructured":"Sancho, N.G.: Shortest path problems with time windows on nodes and arcs. J. Mathe. Analy. Applicat. 186(3), 643\u2013648 (1994)","journal-title":"J. Mathe. Analy. Applicat."},{"key":"1670_CR37","doi-asserted-by":"crossref","unstructured":"Kim, S., Bin, K., Yang, D., Ha, S., Chong, S., Lee, K.: ENTRO: tackling the encoding and networking trade-off in offloaded video analytics. In: Proceedings of the 31st ACM International Conference on Multimedia; p. 9115\u20139123 (2023)","DOI":"10.1145\/3581783.3613785"},{"key":"1670_CR38","unstructured":"Zhu, Y., Li, X., Liu, C., Zolfaghari, M., Xiong, Y., Wu, C., et\u00a0al.: A comprehensive study of deep video action recognition. arXiv preprint arXiv:2012.06567. (2020)"},{"issue":"1","key":"1670_CR39","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1080\/00031305.1988.10475524","volume":"42","author":"J Lee Rodgers","year":"1988","unstructured":"Lee Rodgers, J., Nicewander, W.A.: Thirteen ways to look at the correlation coefficient. The American Statistician. 42(1), 59\u201366 (1988)","journal-title":"The American Statistician."}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01670-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-025-01670-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01670-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,3]],"date-time":"2025-05-03T06:23:25Z","timestamp":1746253405000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-025-01670-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4]]},"references-count":39,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["1670"],"URL":"https:\/\/doi.org\/10.1007\/s11554-025-01670-6","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4]]},"assertion":[{"value":"14 February 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 March 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"95"}}