{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:38:54Z","timestamp":1763105934007,"version":"3.37.3"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,5,24]],"date-time":"2023-05-24T00:00:00Z","timestamp":1684886400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,5,24]],"date-time":"2023-05-24T00:00:00Z","timestamp":1684886400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004739","name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004739","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program","doi-asserted-by":"crossref","award":["2022YFB4501404"],"award-info":[{"award-number":["2022YFB4501404"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Beijing Natural Science Foundation","award":["4232036"],"award-info":[{"award-number":["4232036"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2023,9]]},"DOI":"10.1007\/s42514-023-00154-y","type":"journal-article","created":{"date-parts":[[2023,5,24]],"date-time":"2023-05-24T08:03:49Z","timestamp":1684915429000},"page":"304-321","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["ArkGPU: enabling applications\u2019 high-goodput co-location execution on multitasking GPUs"],"prefix":"10.1007","volume":"5","author":[{"given":"Jie","family":"Lou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yiming","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1176-2521","authenticated-orcid":false,"given":"Huawei","family":"Cao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ninghui","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,5,24]]},"reference":[{"issue":"5","key":"154_CR1","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1145\/2890784","volume":"59","author":"B Burns","year":"2016","unstructured":"Burns, B., Grant, B., Oppenheimer, D., Brewer, E., Wilkes, J.: Borg, omega, and kubernetes. Commun. ACM 59(5), 50\u201357 (2016)","journal-title":"Commun. ACM"},{"issue":"4","key":"154_CR3","doi-asserted-by":"publisher","first-page":"681","DOI":"10.1145\/2954679.2872368","volume":"51","author":"Q Chen","year":"2016","unstructured":"Chen, Q., Yang, H., Mars, J., Tang, L.: Baymax: Qos awareness and increased utilization for non-preemptive accelerators in warehouse scale computers. ACM SIGPLAN Notices 51(4), 681\u2013696 (2016)","journal-title":"ACM SIGPLAN Notices"},{"key":"154_CR2","doi-asserted-by":"crossref","unstructured":"Chen, S., Delimitrou, C., Mart\u00ednez, J.F.: Parties: Qos-aware resource partitioning for multiple interactive services. In: Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 107\u2013120 (2019)","DOI":"10.1145\/3297858.3304005"},{"key":"154_CR4","unstructured":"cuBLAS. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf. Accessed 25 Dec 2022"},{"key":"154_CR5","doi-asserted-by":"crossref","unstructured":"Duato, J., Igual, F.D., Mayo, R., Pena, A.J., Quintana-Ort\u00ed, E.S., Silla, F.: An efficient implementation of gpu virtualization in high performance clusters. In: European Conference on Parallel Processing, pp. 385\u2013394 (2009). Springer","DOI":"10.1007\/978-3-642-14122-5_44"},{"issue":"14\u201315","key":"154_CR6","doi-asserted-by":"publisher","first-page":"2627","DOI":"10.1016\/S1352-2310(97)00447-0","volume":"32","author":"MW Gardner","year":"1998","unstructured":"Gardner, M.W., Dorling, S.: Artificial neural networks (the multilayer perceptron)-a review of applications in the atmospheric sciences. Atmos. Environ. 32(14\u201315), 2627\u20132636 (1998)","journal-title":"Atmos. Environ."},{"key":"154_CR7","doi-asserted-by":"crossref","unstructured":"Gu, J., Song, S., Li, Y., Luo, H.: Gaiagpu: sharing gpus in container clouds. In: 2018 IEEE Intl Conf on Parallel & Distributed Processing with Applications, Ubiquitous Computing & Communications, Big Data & Cloud Computing, Social Computing & Networking, Sustainable Computing & Communications (ISPA\/IUCC\/BDCloud\/SocialCom\/SustainCom), pp. 469\u2013476 (2018). IEEE","DOI":"10.1109\/BDCloud.2018.00077"},{"key":"154_CR8","doi-asserted-by":"crossref","unstructured":"Hafeez, U.U., Gandhi, A.: Empirical analysis and modeling of compute times of cnn operations on aws cloud. In: 2020 IEEE International Symposium on Workload Characterization (IISWC), pp. 181\u2013192 (2020). IEEE","DOI":"10.1109\/IISWC50251.2020.00026"},{"key":"154_CR9","doi-asserted-by":"crossref","unstructured":"Li, J., Xu, H., Zhu, Y., Liu, Z., Guo, C., Wang, C.: Aryl: an Elastic Cluster Scheduler for Deep Learning. arXiv (2022). https:\/\/arxiv.org\/abs\/2202.07896","DOI":"10.1145\/3552326.3587445"},{"key":"154_CR10","doi-asserted-by":"crossref","unstructured":"Mars, J., Tang, L., Hundt, R., Skadron, K., Soffa, M.L.: Bubble-up: Increasing utilization in modern warehouse scale computers via sensible co-locations. In: Proceedings of the 44th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 248\u2013259 (2011)","DOI":"10.1145\/2155620.2155650"},{"key":"154_CR11","unstructured":"Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf. Accessed 25 Dec 2022"},{"issue":"6","key":"154_CR12","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1002\/cem.873","volume":"18","author":"AJ Myles","year":"2004","unstructured":"Myles, A.J., Feudale, R.N., Liu, Y., Woody, N.A., Brown, S.D.: An introduction to decision tree modeling. J. Chemometrics 18(6), 275\u2013285 (2004)","journal-title":"J. Chemometrics"},{"key":"154_CR13","unstructured":"NVIDIA MIG. https:\/\/www.nvidia.cn\/technologies\/multi-instance-gpu\/. Accessed 25 Dec 2022"},{"key":"154_CR14","unstructured":"Nvml-api. https:\/\/docs.nvidia.com\/deploy\/nvml-api\/index.html. Accessed 25 Dec 2022"},{"key":"154_CR15","unstructured":"OpenAI. https:\/\/openai.com\/. Accessed 25 Dec 2022"},{"key":"154_CR16","doi-asserted-by":"crossref","unstructured":"Patel, T., Tiwari, D.: Clite: Efficient and qos-aware co-location of multiple latency-critical jobs for warehouse scale computers. In: 2020 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 193\u2013206 (2020). IEEE","DOI":"10.1109\/HPCA47549.2020.00025"},{"key":"154_CR17","doi-asserted-by":"crossref","unstructured":"Reddi, V.J., Cheng, C., Kanter, D., Mattson, P., Schmuelling, G., Wu, C.-J., Anderson, B., Breughe, M., Charlebois, M., Chou, W., et\u00a0al.: Mlperf inference benchmark. In: 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), pp. 446\u2013459 (2020). IEEE","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"154_CR18","volume-title":"Linear regression analysis","author":"GA Seber","year":"2012","unstructured":"Seber, G.A., Lee, A.J.: Linear regression analysis. Wiley, Hoboken (2012)"},{"key":"154_CR19","doi-asserted-by":"crossref","unstructured":"Shen, H., Chen, L., Jin, Y., Zhao, L., Kong, B., Philipose, M., Krishnamurthy, A., Sundaram, R.: Nexus: A gpu cluster engine for accelerating dnn-based video analysis. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, pp. 322\u2013337 (2019)","DOI":"10.1145\/3341301.3359658"},{"key":"154_CR20","doi-asserted-by":"crossref","unstructured":"Thinakaran, P., Gunasekaran, J.R., Sharma, B., Kandemir, M.T., Das, C.R.: Kube-knots: Resource harvesting through dynamic container orchestration in gpu-based datacenters. In: 2019 IEEE International Conference on Cluster Computing (CLUSTER), pp. 1\u201313 (2019). 10.1109\/CLUSTER.2019.8891040","DOI":"10.1109\/CLUSTER.2019.8891040"},{"key":"154_CR21","unstructured":"Xiao, W., Bhardwaj, R., Ramjee, R., Sivathanu, M., Kwatra, N., Han, Z., Patel, P., Peng, X., Zhao, H., Zhang, Q., et\u00a0al.: Gandiva: Introspective cluster scheduling for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp. 595\u2013610 (2018)"},{"key":"154_CR22","unstructured":"Xu\u00a0Z\u00a0W, L.G.J., H, S.N.: Superbahn: Towards new type of cyberinfrastructure. Bull. Chin. Acad. Sci. 37(1), 46\u201352 (2022)"},{"issue":"3","key":"154_CR23","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1145\/2508148.2485974","volume":"41","author":"H Yang","year":"2013","unstructured":"Yang, H., Breslow, A., Mars, J., Tang, L.: Bubble-flux: Precise online qos management for increased utilization in warehouse scale computers. ACM SIGARCH Comput. Architecture News 41(3), 607\u2013618 (2013)","journal-title":"ACM SIGARCH Comput. Architecture News"},{"key":"154_CR24","doi-asserted-by":"crossref","unstructured":"Yeh, T.-A., Chen, H.-H., Chou, J.: Kubeshare: A framework to manage gpus as first-class and shared resources in container cloud. In: Proceedings of the 29th International Symposium on High-Performance Parallel and Distributed Computing, pp. 173\u2013184 (2020)","DOI":"10.1145\/3369583.3392679"},{"key":"154_CR26","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Laurenzano, M.A., Mars, J., Tang, L.: Smite: Precise qos prediction on real-system smt processors to improve utilization in warehouse scale computers. In: 2014 47th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 406\u2013418 (2014). IEEE","DOI":"10.1109\/MICRO.2014.53"},{"key":"154_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, W., Chen, Q., Zheng, N., Cui, W., Fu, K., Guo, M.: Towards qos-awareness and improved utilization of spatial multitasking gpus. IEEE Trans. Comput. 71(4), 866\u2013879 (2022)","DOI":"10.1109\/TC.2021.3064352"},{"key":"154_CR27","doi-asserted-by":"crossref","unstructured":"Zhao, W., Chen, Q., Lin, H., Zhang, J., Leng, J., Li, C., Zheng, W., Li, L., Guo, M.: Themis: Predicting and reining in application-level slowdown on spatial multitasking gpus. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 653\u2013663 (2019). IEEE","DOI":"10.1109\/IPDPS.2019.00074"},{"key":"154_CR28","doi-asserted-by":"crossref","unstructured":"Zhu, H., Erez, M.: Dirigent: Enforcing qos for latency-critical tasks on shared multicore systems. In: Proceedings of the Twenty-first International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 33\u201347 (2016)","DOI":"10.1145\/2954680.2872394"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00154-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-023-00154-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-023-00154-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,4]],"date-time":"2023-12-04T08:06:37Z","timestamp":1701677197000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-023-00154-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,24]]},"references-count":28,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2023,9]]}},"alternative-id":["154"],"URL":"https:\/\/doi.org\/10.1007\/s42514-023-00154-y","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"type":"print","value":"2524-4922"},{"type":"electronic","value":"2524-4930"}],"subject":[],"published":{"date-parts":[[2023,5,24]]},"assertion":[{"value":"11 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 May 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 May 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No potential conflict of interest was reported by the authors","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}