{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T14:44:59Z","timestamp":1775745899602,"version":"3.50.1"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62350710797"],"award-info":[{"award-number":["62350710797"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Plan Project of Shenzhen","award":["G20220831110002004"],"award-info":[{"award-number":["G20220831110002004"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1109\/tpds.2024.3511543","type":"journal-article","created":{"date-parts":[[2024,12,5]],"date-time":"2024-12-05T19:19:00Z","timestamp":1733426340000},"page":"266-281","source":"Crossref","is-referenced-by-count":3,"title":["TOP: Task-Based Operator Parallelism for Asynchronous Deep Learning Inference on GPU"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6805-2649","authenticated-orcid":false,"given":"Changyao","family":"Lin","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, Heilongjiang, China"}]},{"given":"Zhenming","family":"Chen","sequence":"additional","affiliation":[{"name":"China Construction Steel Structure Engineering Corp., LTD, Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2539-8257","authenticated-orcid":false,"given":"Ziyang","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology, Harbin, Heilongjiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6209-6886","authenticated-orcid":false,"given":"Jie","family":"Liu","sequence":"additional","affiliation":[{"name":"National Key Laboratory of Smart Farm Technologies and Systems, Harbin Institute of Technology, Harbin, Heilongjiang, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Cuda programming guide","year":"2020"},{"key":"ref2","article-title":"Cuda multi-streams","year":"2015"},{"key":"ref3","article-title":"Multi-process service","year":"2020"},{"key":"ref4","article-title":"Multi-instance GPU","year":"2020"},{"key":"ref5","article-title":"Multi-model machine learning inference serving with GPU spatial partitioning","author":"Choi","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3485730.3492886"},{"key":"ref7","first-page":"15","article-title":"Deep learning inference service at Microsoft","volume-title":"Proc. 2019 USENIX Conf. Oper. Mach. Learn.","author":"Soifer"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3560905.3568520"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"ref10","first-page":"167","article-title":"IOS: Inter-operator scheduler for CNN acceleration","volume-title":"Proc. Mach. Learn. Syst.","volume":"3","author":"Ding"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3583120.3586953"},{"key":"ref12","article-title":"A survey of multi-tenant deep learning inference on GPU","author":"Yu","year":"2022"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.5555\/3291168.3291211"},{"key":"ref14","article-title":"Efficient inference with TensorRT","volume-title":"Proc. GPU Technol. Conf.","author":"Vanholder"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643501"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SECON55815.2022.9918563"},{"key":"ref17","first-page":"6379","article-title":"Multi-agent actor-critic for mixed cooperative-competitive environments","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lowe"},{"key":"ref18","first-page":"265","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","volume-title":"Proc. 12th USENIX Conf. Operating Syst. Des. Implementation","author":"Abadi"},{"key":"ref19","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Paszke"},{"key":"ref20","article-title":"Horizontally fused training array: An effective hardware utilization squeezer for training novel deep learning models","author":"Wang","year":"2021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/RTSS55097.2022.00032"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref23","article-title":"Geforce RTX3080 family","year":"2024"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref25","article-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $< $<0.5MB model size","author":"Iandola","year":"2016"},{"key":"ref26","article-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"ref27","first-page":"387","article-title":"Deterministic policy gradient algorithms","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Silver"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2020.2981333"},{"key":"ref29","article-title":"Learning from delayed rewards","author":"Watkins","year":"1989"},{"key":"ref30","article-title":"cuDNN: Efficient primitives for deep learning","author":"Chetlur","year":"2014"},{"key":"ref31","article-title":"NVIDIA Jetson Xavier","year":"2022"},{"key":"ref32","article-title":"NVIDIA A100","year":"2024"},{"key":"ref33","article-title":"NVIDIA Jetson nano","year":"2022"},{"key":"ref34","article-title":"NVIDIA Jetson TX2","year":"2022"},{"key":"ref35","article-title":"NVIDIA cuDNN documentation","year":"2020"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref37","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854370"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3625687.3625789"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"ref41","article-title":"AI on the edge: Rethinking AI-based IoT applications using specialized edge architectures","author":"Liang","year":"2020"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419194"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2016.2579198"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10795769\/10778584.pdf?arnumber=10778584","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T06:02:23Z","timestamp":1735365743000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10778584\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":43,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2024.3511543","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2]]}}}