{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,4]],"date-time":"2025-06-04T05:25:52Z","timestamp":1749014752551,"version":"3.37.3"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T00:00:00Z","timestamp":1666828800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T00:00:00Z","timestamp":1666828800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100002491","name":"Hansung University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002491","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s11227-022-04901-w","type":"journal-article","created":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T14:06:20Z","timestamp":1666879580000},"page":"5851-5877","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["gCFS: completely fair scheduling on multiple GPUs for improved multi-DNN execution in terms of performance isolation"],"prefix":"10.1007","volume":"79","author":[{"given":"Hojin","family":"Cho","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4254-4009","authenticated-orcid":false,"given":"Myungsun","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,27]]},"reference":[{"issue":"12","key":"4901_CR1","doi-asserted-by":"publisher","first-page":"1347","DOI":"10.1109\/TCOM.1987.1096719","volume":"35","author":"M Karol","year":"1987","unstructured":"Karol M, Hluchyj M, Morgan S (1987) Input versus output queueing on a space-division packet switch. IEEE Trans Commun 35(12):1347\u20131356. https:\/\/doi.org\/10.1109\/TCOM.1987.1096719","journal-title":"IEEE Trans Commun"},{"key":"4901_CR2","unstructured":"Xiao W, Bhardwaj R, Ramjee R, Sivathanu M, Kwatra N, Han Z,Patel P, Peng X, Zhao H, Zhang Q, Yang F, Zhou L (2018) Gandiva: introspective cluster scheduling for deep learning. In: 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18), pp 595\u2013610"},{"key":"4901_CR3","doi-asserted-by":"publisher","unstructured":"Peng Y, Bao Y, Chen Y, Wu C, Guo C (2018) Optimus: an efficient dynamic resource scheduler for deep learning clusters. In: Proceedings of the 13th EuroSys Conference, pp 1\u201314. https:\/\/doi.org\/10.1145\/3190508.3190517","DOI":"10.1145\/3190508.3190517"},{"key":"4901_CR4","doi-asserted-by":"publisher","unstructured":"Chen Q, Yang H, Mars J, Tang L (2016) Baymax: QoS awareness and increased utilization for non-preemptive accelerators in warehouse scale computers. In: Proceedings of the 21st International Conference on Architectural Support for Programming Languages and Operating Systems, pp 681\u2013696. https:\/\/doi.org\/10.1145\/2872362.2872368","DOI":"10.1145\/2872362.2872368"},{"key":"4901_CR5","doi-asserted-by":"publisher","unstructured":"Chen Q, Yang H, Guo M, Kannan RS, Mars J, Tang L (2017) Prophet: precise QoS prediction on non-preemptive accelerators to improve utilization in warehouse-scale computers. In: Proceedings of the 22nd International Conference on Architectural Support for Programming Languages and Operating Systems, pp 17\u201332. https:\/\/doi.org\/10.1145\/3037697.3037700","DOI":"10.1145\/3037697.3037700"},{"key":"4901_CR6","doi-asserted-by":"publisher","unstructured":"Chaudhary S, Ramjee R, Sivathanu M, Kwatra N, Viswanatha S (2020) Balancing efficiency and fairness in heterogeneous GPU clusters for deep learning. In: Proceedings of the 15th European Conference on Computer Systems, pp 1\u201316. https:\/\/doi.org\/10.1145\/3342195.3387555","DOI":"10.1145\/3342195.3387555"},{"key":"4901_CR7","unstructured":"Mahajan K, Balasubramanian A, Singhvi A, Venkataraman S, Akella A, Phanishayee A, Chawla S (2020) Themis: fair and efficient GPU cluster scheduling. In: 17th USENIX Symposium on Networked Systems Design and Implementation (NSDI 20), pp 289\u2013304"},{"key":"4901_CR8","doi-asserted-by":"publisher","unstructured":"Le TN, Sun X, Chowdhury M, Liu Z (2020) AlloX: compute allocation in hybrid clusters. In: Proceedings of the 15th European Conference on Computer Systems, pp 1\u201316. https:\/\/doi.org\/10.1145\/3342195.3387547","DOI":"10.1145\/3342195.3387547"},{"issue":"6","key":"4901_CR9","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1007\/BF01940883","volume":"15","author":"SK Baruah","year":"1996","unstructured":"Baruah SK, Cohen NK, Plaxton CG, Varvel DA (1996) Proportionate progress: a notion of fairness in resource allocation. Algorithmica 15(6):600\u2013625. https:\/\/doi.org\/10.1007\/BF01940883","journal-title":"Algorithmica"},{"issue":"5","key":"4901_CR10","doi-asserted-by":"publisher","first-page":"198","DOI":"10.1145\/268998.266689","volume":"31","author":"MB Jones","year":"1997","unstructured":"Jones MB, Ro\u015fu D, Ro\u015fu M (1997) CPU reservations and time constraints: efficient, predictable scheduling of independent activities. SIGOPS Oper Syst Rev 31(5):198\u2013211. https:\/\/doi.org\/10.1145\/268998.266689","journal-title":"SIGOPS Oper Syst Rev"},{"key":"4901_CR11","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1016\/j.jpdc.2017.08.012","volume":"111","author":"M Kim","year":"2018","unstructured":"Kim M, Noh S, Hyeon J, Hong S (2018) Fair-share scheduling in single-ISA asymmetric multicore architecture via scaled virtual runtime and load redistribution. J Parallel Distrib Comput 111:174\u2013186. https:\/\/doi.org\/10.1016\/j.jpdc.2017.08.012","journal-title":"J Parallel Distrib Comput"},{"key":"4901_CR12","doi-asserted-by":"publisher","first-page":"98874","DOI":"10.1109\/ACCESS.2020.2996596","volume":"8","author":"J Kim","year":"2020","unstructured":"Kim J, Shin P, Kim M, Hong S (2020) Memory-aware fair-share scheduling for improved performance isolation in the linux kernel. IEEE Access 8:98874\u201398886. https:\/\/doi.org\/10.1109\/ACCESS.2020.2996596","journal-title":"IEEE Access"},{"issue":"11","key":"4901_CR13","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1002\/spe.2285","volume":"45","author":"S Huh","year":"2015","unstructured":"Huh S, Yoo J, Hong S (2015) Cross-layer resource control and scheduling for improving interactivity in android. Softw Pract Exp 45(11):1549\u20131570. https:\/\/doi.org\/10.1002\/spe.2285","journal-title":"Softw Pract Exp"},{"key":"4901_CR14","doi-asserted-by":"publisher","unstructured":"Amert T, Otterness N, Yang M, Anderson JH, Smith FD (2017) GPU scheduling on the nvidia tx2: hidden details revealed. In: 2017 IEEE Real-Time Systems Symposium (RTSS), pp 104\u2013115. https:\/\/doi.org\/10.1109\/RTSS.2017.00017","DOI":"10.1109\/RTSS.2017.00017"},{"key":"4901_CR15","doi-asserted-by":"publisher","first-page":"85403","DOI":"10.1109\/ACCESS.2021.3088861","volume":"9","author":"C Lim","year":"2021","unstructured":"Lim C, Kim M (2021) ODMDEF: on-device multi-DNN execution framework utilizing adaptive layer-allocation on general purpose cores and accelerators. IEEE Access 9:85403\u201385417. https:\/\/doi.org\/10.1109\/ACCESS.2021.3088861","journal-title":"IEEE Access"},{"key":"4901_CR16","unstructured":"Rennich S (2012) Cuda c\/c++ streams and concurrency. https:\/\/developer.download.nvidia.com\/CUDA\/training\/StreamsAndConcurrencyWebinar.pdf. Accessed 11 April 2022"},{"key":"4901_CR17","unstructured":"Schroeder TC (2011) Peer-to-peer and unified virtual addressing. https:\/\/developer.download.nvidia.com\/CUDA\/training\/cuda_webinars_GPUDirect_uva.pdf. Accessed 11 Apr 2022"},{"key":"4901_CR18","unstructured":"NVIDIA (2012) Issue efficiency. https:\/\/docs.nvidia.com\/gameworks\/content\/developertools\/desktop\/analysis\/report\/cudaexperiments\/kernellevel\/issueefficiency.htm. Accessed 11 Apr 2022"},{"key":"4901_CR19","unstructured":"PyTorch. https:\/\/pytorch.org\/. Accessed 11 Apr 2022"},{"key":"4901_CR20","unstructured":"Johnson J (2022) Learning pytorch with examples. https:\/\/pytorch.org\/tutorials\/beginner\/pytorch_with_examples.html. Accessed 11 Oct 2022"},{"key":"4901_CR21","unstructured":"Ajitsaria A (2020) What is the python global interpreter lock (GIL)? https:\/\/realpython.com\/python-gil\/. Accessed 11 Apr 2022"},{"key":"4901_CR22","unstructured":"TorchScript. https:\/\/pytorch.org\/docs\/master\/jit.html. Accessed 11 Oct 2022"},{"issue":"7","key":"4901_CR23","doi-asserted-by":"publisher","first-page":"1135","DOI":"10.1007\/s00138-019-01042-8","volume":"30","author":"X Yu","year":"2019","unstructured":"Yu X, Zeng N, Liu S, Zhang Y (2019) Utilization of DenseNet201 for diagnosis of breast abnormality. Mach Vis Appl 30(7):1135\u20131144. https:\/\/doi.org\/10.1007\/s00138-019-01042-8","journal-title":"Mach Vis Appl"},{"key":"4901_CR24","doi-asserted-by":"publisher","unstructured":"Nguyen LD, Lin D, Lin Z, Cao J (2018) Deep CNNs for microscopic image classification by exploiting transfer learning and feature concatenation. In: 2018 IEEE International Symposium on Circuits and Systems (ISCAS), pp 1\u20135. https:\/\/doi.org\/10.1109\/ISCAS.2018.8351550","DOI":"10.1109\/ISCAS.2018.8351550"},{"key":"4901_CR25","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"4901_CR26","unstructured":"Tan M, Le Q (2019) Efficientnet: rethinking model scaling for convolutional neural networks. In: Proceedings of the 36th International Conference on Machine Learning, pp 6105\u20136114"},{"key":"4901_CR27","unstructured":"NVIDIA Nsight Systems. https:\/\/developer.nvidia.com\/nsight-systems. Accessed 11 Apr 2022"},{"key":"4901_CR28","unstructured":"Narayanan D, Santhanam K, Kazhamiaka F, Phanishayee A, Zaharia M (2020) Heterogeneity-aware cluster scheduling policies for deep learning workloads. In: 14th USENIX Symposium on Operating Systems Design and implementation (OSDI 20), pp 481\u2013498"},{"key":"4901_CR29","unstructured":"Jeon M, Venkataraman S, Phanishayee A, Qian J, Xiao W, Yang F (2019) Analysis of large-scale multi-tenant GPU clusters for DNN training workloads. In: 2019 USENIX Annual Technical Conference (USENIX ATC 19), pp 947\u2013960"},{"key":"4901_CR30","unstructured":"Gu J, Chowdhury M, Shin KG, Zhu Y, Jeon M, Qian J, Liu H, Guo C (2019) Tiresias: a GPU cluster manager for distributed deep learning. In: 16th USENIX Symposium on Networked Systems Design and Implementation (NSDI 19), pp 485\u2013500"},{"issue":"1","key":"4901_CR31","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/s11134-009-9141-x","volume":"63","author":"S Aalto","year":"2009","unstructured":"Aalto S, Ayesta U, Righter R (2009) On the Gittins index in the M\/G\/1 queue. Queueing Syst 63(1):437\u2013458. https:\/\/doi.org\/10.1007\/s11134-009-9141-x","journal-title":"Queueing Syst"},{"key":"4901_CR32","doi-asserted-by":"publisher","DOI":"10.1002\/9780470980033","volume-title":"Multi-armed bandit allocation indices","author":"J Gittins","year":"2011","unstructured":"Gittins J, Glazebrook K, Weber R (2011) Multi-armed bandit allocation indices. Wiley, Hoboken"},{"issue":"3","key":"4901_CR33","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1016\/j.peva.2007.06.028","volume":"65","author":"M Nuyens","year":"2008","unstructured":"Nuyens M, Wierman A (2008) The foreground\u2013background queue: a survey. Perform Eval 65(3):286\u2013307. https:\/\/doi.org\/10.1016\/j.peva.2007.06.028","journal-title":"Perform Eval"},{"issue":"4","key":"4901_CR34","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1145\/2785956.2787480","volume":"45","author":"M Chowdhury","year":"2015","unstructured":"Chowdhury M, Stoica I (2015) Efficient coflow scheduling without prior knowledge. SIGCOMM Comput Commun Rev 45(4):393\u2013406. https:\/\/doi.org\/10.1145\/2785956.2787480","journal-title":"SIGCOMM Comput Commun Rev"},{"key":"4901_CR35","doi-asserted-by":"publisher","unstructured":"Corbat\u00f3 FJ, Merwin-Daggett M, Daley RC (1962) An experimental time-sharing system. In: Spring Joint Computer Conference, pp 335\u2013344. https:\/\/doi.org\/10.1145\/1460833.1460871","DOI":"10.1145\/1460833.1460871"},{"key":"4901_CR36","unstructured":"Zhao H,Han Z, Yang Z, Zhang Q, Yang F,Zhou L, Yang M, Lau FCM, Wang Y, Xiong Y, Wang B (2020) HiveD: sharing a GPU cluster for deep learning with guarantees. In: 14th USENIX Symposium on Operating Systems Design and Implementation (OSDI 20), pp 515\u2013532"},{"key":"4901_CR37","unstructured":"Jain P, Mo X, Jain A, Subbaraj H, Durrani RS, Tumanov A, Gonzalez J, Stoica I (2018) Dynamic space\u2013time scheduling for GPU inference. arXiv preprint arXiv:http:\/\/arxiv.org\/abs\/1901.00041"},{"key":"4901_CR38","doi-asserted-by":"publisher","unstructured":"Xiang Y, Kim H (2019) Pipelined data-parallel CPU\/GPU scheduling for multi-DNN real-time inference. In: 2019 IEEE Real-Time Systems Symposium (RTSS), pp 392\u2013405. https:\/\/doi.org\/10.1109\/RTSS46320.2019.00042","DOI":"10.1109\/RTSS46320.2019.00042"},{"key":"4901_CR39","doi-asserted-by":"publisher","unstructured":"Goswami A, Young J, Schwan K, Farooqui N, Gavrilovska A, Wolf M, Eisenhauer G (2016) GPUShare: fair-sharing middleware for GPU clouds. In: 2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp 1796\u20131776. https:\/\/doi.org\/10.1109\/IPDPSW.2016.94","DOI":"10.1109\/IPDPSW.2016.94"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04901-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-022-04901-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-022-04901-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,2]],"date-time":"2023-03-02T19:09:55Z","timestamp":1677784195000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-022-04901-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,27]]},"references-count":39,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["4901"],"URL":"https:\/\/doi.org\/10.1007\/s11227-022-04901-w","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2022,10,27]]},"assertion":[{"value":"16 October 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}