{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T09:14:57Z","timestamp":1770455697324,"version":"3.49.0"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030106317","type":"print"},{"value":"9783030106324","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-10632-4_5","type":"book-chapter","created":{"date-parts":[[2019,1,12]],"date-time":"2019-01-12T04:33:04Z","timestamp":1547267584000},"page":"83-106","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Adaptive Simultaneous Multi-tenancy for GPUs"],"prefix":"10.1007","author":[{"given":"Ramin","family":"Bashizade","sequence":"first","affiliation":[]},{"given":"Yuxuan","family":"Li","sequence":"additional","affiliation":[]},{"given":"Alvin R.","family":"Lebeck","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,13]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Adriaens, J.T., Compton, K., Kim, N.S., Schulte, M.J.: The case for GPGPU spatial multitasking. In: Proceedings of the 2012 IEEE 18th International Symposium on High-Performance Computer Architecture. HPCA 2012, pp. 1\u201312. IEEE Computer Society, Washington, DC (2012). http:\/\/dx.doi.org\/10.1109\/HPCA.2012.6168946","DOI":"10.1109\/HPCA.2012.6168946"},{"key":"5_CR2","unstructured":"Amazon Web Services: Elastic GPUS (2017). https:\/\/aws.amazon.com\/ec2\/Elastic-GPUs\/"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Basaran, C., Kang, K.D.: Supporting preemptive task executions and memory copies in GPGPUS. In: Proceedings of the 2012 24th Euromicro Conference on Real-Time Systems. ECRTS 2012, pp. 287\u2013296. IEEE Computer Society, Washington, DC (2012). http:\/\/dx.doi.org\/10.1109\/ECRTS.2012.15","DOI":"10.1109\/ECRTS.2012.15"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Chase, J.S., Anderson, D.C., Thakar, P.N., Vahdat, A.M., Doyle, R.P.: Managing energy and server resources in hosting centers. In: Proceedings of the Eighteenth ACM Symposium on Operating Systems Principles. SOSP 2001, pp. 103\u2013116. ACM, New York (2001). http:\/\/doi.acm.org\/10.1145\/502034.502045","DOI":"10.1145\/502034.502045"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Che, S., Sheaffer, J.W., Boyer, M., Szafaryn, L.G., Wang, L., Skadron, K.: A characterization of the Rodinia benchmark suite with comparison to contemporary cmp workloads. In: Proceedings of the IEEE International Symposium on Workload Characterization (IISWC 2010), pp. 1\u201311. IISWC 2010. IEEE Computer Society, Washington, DC (2010). http:\/\/dx.doi.org\/10.1109\/IISWC.2010.5650274","DOI":"10.1109\/IISWC.2010.5650274"},{"key":"5_CR6","unstructured":"Chen, G., Zhao, Y., Shen, X., Zhou, H.: Effisha: a software framework for enabling effficient preemptive scheduling of GPU. In: Proceedings of the 22Nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 3\u201316. PPoPP 2017, ACM, New York (2017). http:\/\/doi.acm.org\/10.1145\/3018743.3018748"},{"key":"5_CR7","unstructured":"Danalis, A., et al.: The scalable heterogeneous computing (shoc) benchmark suite. In: Proceedings of the 3rd Workshop on General-Purpose Computation on Graphics Processing Units, pp. 63\u201374. GPGPU-3, ACM, New York (2010). http:\/\/doi.acm.org\/10.1145\/1735688.1735702"},{"issue":"3","key":"5_CR8","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/MM.2008.44","volume":"28","author":"S Eyerman","year":"2008","unstructured":"Eyerman, S., Eeckhout, L.: System-level performance metrics for multiprogram workloads. IEEE Micro 28(3), 42\u201353 (2008)","journal-title":"IEEE Micro"},{"key":"5_CR9","unstructured":"Google: Google cloud platforms (2017). https:\/\/cloud.google.com\/gpu\/"},{"key":"5_CR10","unstructured":"Gregg, C., Dorn, J., Hazelwood, K., Skadron, K.: Fine-grained Resource Sharing for Concurrent GPGPU Kernels. In: Proceedings of the 4th USENIX Conference on Hot Topics in Parallelism. HotPar 2012, p. 10. USENIX Association, Berkeley, (2012). http:\/\/dl.acm.org\/citation.cfm?id=2342788.2342798"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Gupta, K., Stuart, J.A., Owens, J.D.: A study of persistent threads style GPU programming for GPGPU workloads. In: 2012 Innovative Parallel Computing (InPar), pp. 1\u201314, May 2012","DOI":"10.1109\/InPar.2012.6339596"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Jiao, Q., Lu, M., Huynh, H.P., Mitra, T.: Improving GPGPU energy-efficiency through concurrent kernel execution and DVFs. In: Proceedings of the 13th Annual IEEE\/ACM International Symposium on Code Generation and Optimization. CGO 2015, pp. 1\u201311. IEEE Computer Society, Washington, DC (2015). http:\/\/dl.acm.org\/citation.cfm?id=2738600.2738602","DOI":"10.1109\/CGO.2015.7054182"},{"key":"5_CR13","unstructured":"Jones, S.: Introduction to dynamic parallelism. In: Nvidia GPU Technology Conference. NVIDIA (2012). http:\/\/developer.download.nvidia.com\/GTC\/PDF\/GTC2012\/PresentationPDF\/S0338-GTC2012-CUDA-Programming-Model.pdf"},{"issue":"3","key":"5_CR14","doi-asserted-by":"publisher","first-page":"748","DOI":"10.1109\/TPDS.2014.2313342","volume":"26","author":"Y Liang","year":"2015","unstructured":"Liang, Y., Huynh, H.P., Rupnow, K., Goh, R.S.M., Chen, D.: Efficient gpu spatial-temporal multitasking. IEEE Trans. Parall. Distrib. Syst. 26(3), 748\u2013760 (2015)","journal-title":"IEEE Trans. Parall. Distrib. Syst."},{"key":"5_CR15","unstructured":"Microsoft: Microsoft azure (2016). https:\/\/azure.microsoft.com\/en-us\/blog\/azure-n-series-general-availability-on-december-1\/"},{"key":"5_CR16","unstructured":"Nvidia: CUDA programming guide (2008). https:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/"},{"key":"5_CR17","unstructured":"Nvidia: Next generation CUDA computer architecture Kepler GK110 (2012)"},{"key":"5_CR18","unstructured":"NVIDIA: Multi-process service (2015). https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA_Multi_Process_Service_Overview.pdf"},{"key":"5_CR19","unstructured":"NVIDIA: Pascal architecture whitepaper, June 2015. http:\/\/www.nvidia.com\/object\/pascal-architecture-whitepaper.html"},{"key":"5_CR20","unstructured":"NVIDIA: Volta architecture whitepaper, June 2015. http:\/\/www.nvidia.com\/object\/volta-architecture-whitepaper.html"},{"key":"5_CR21","unstructured":"Pai, S., Thazhuthaveetil, M.J., Govindarajan, R.: Improving GPGPU concurrency with elastic kernels. In: Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 407\u2013418. ASPLOS 2013, ACM, New York (2013). http:\/\/doi.acm.org\/10.1145\/2451116.2451160"},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Park, J.J.K., Park, Y., Mahlke, S.: Chimera: collaborative preemption for multitasking on a shared GPU. In: Proceedings of the Twentieth International Conference on Architectural Support for Programming Languages and Operating Systems. ASPLOS 2015, pp. 593\u2013606. ACM, New York (2015). http:\/\/doi.acm.org\/10.1145\/2694344.2694346","DOI":"10.1145\/2694344.2694346"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Park, J.J.K., Park, Y., Mahlke, S.: Dynamic resource management for efficient utilization of multitasking GPUs. In: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems. ASPLOS 2017, pp. 527\u2013540. ACM, New York (2017). http:\/\/doi.acm.org\/10.1145\/3037697.3037707","DOI":"10.1145\/3037697.3037707"},{"key":"5_CR24","doi-asserted-by":"crossref","unstructured":"Randles, M., Lamb, D., Taleb-Bendiab, A.: A comparative study into distributed load balancing algorithms for cloud computing. In: 2010 IEEE 24th International Conference on Advanced Information Networking and Applications Workshops, pp. 551\u2013556, April 2010","DOI":"10.1109\/WAINA.2010.85"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Shahar, S., Bergman, S., Silberstein, M.: Activepointers: a case for software address translation on GPUs. In: Proceedings of the 43rd International Symposium on Computer Architecture. ISCA 2016, pp. 596\u2013608. IEEE Press, Piscataway (2016). https:\/\/doi.org\/10.1109\/ISCA.2016.58","DOI":"10.1145\/3007787.3001200"},{"key":"5_CR26","unstructured":"Stratton, J.A., et al.: Parboil: a revised benchmark suite for scientific and commercial throughput computing. Technical report (2012). https:\/\/scholar.google.com\/scholar?oi=bibs&hl=en&cluster=14097255143770688510"},{"key":"5_CR27","unstructured":"Tanasic, I., Gelado, I., Cabezas, J., Ramirez, A., Navarro, N., Valero, M.: Enabling preemptive multiprogramming on GPUs. In: Proceeding of the 41st Annual International Symposium on Computer Architecuture, pp. 193\u2013204. ISCA 2014, IEEE Press, Piscataway (2014). http:\/\/dl.acm.org\/citation.cfm?id=2665671.2665702"},{"key":"5_CR28","doi-asserted-by":"crossref","unstructured":"Wang, Z., Yang, J., Melhem, R., Childers, B., Zhang, Y., Guo, M.: Simultaneous multikernel GPU: Multi-tasking throughput processors via fine-grained sharing. In: 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 358\u2013369, March 2016","DOI":"10.1109\/HPCA.2016.7446078"},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Wu, B., Chen, G., Li, D., Shen, X., Vetter, J.: Enabling and exploiting flexible task assignment on GPU through SM-centric program transformations. In: Proceedings of the 29th ACM on International Conference on Supercomputing. ICS 2015, pp. 119\u2013130. ACM, New York (2015). http:\/\/doi.acm.org\/10.1145\/2751205.2751213","DOI":"10.1145\/2751205.2751213"},{"key":"5_CR30","unstructured":"Wu, B., Liu, X., Zhou, X., Jiang, C.: Flep: enabling flexible and efficient preemption on GPUs. In: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 483\u2013496. ASPLOS 2017, ACM, New York (2017). http:\/\/doi.acm.org\/10.1145\/3037697.3037742"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Xu, Q., Jeon, H., Kim, K., Ro, W.W., Annavaram, M.: Warped-slicer: Efficient intra-SM slicing through dynamic resource partitioning for GPU multiprogramming. In: 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), pp. 230\u2013242, June 2016","DOI":"10.1109\/ISCA.2016.29"},{"issue":"6","key":"5_CR32","doi-asserted-by":"publisher","first-page":"1522","DOI":"10.1109\/TPDS.2013.257","volume":"25","author":"J Zhong","year":"2014","unstructured":"Zhong, J., He, B.: Kernelet: high-throughput gpu kernel executions with dynamic slicing and scheduling. IEEE Trans. Parallel Distrib. Syst. 25(6), 1522\u20131532 (2014). https:\/\/doi.org\/10.1109\/TPDS.2013.257","journal-title":"IEEE Trans. Parallel Distrib. Syst."}],"container-title":["Lecture Notes in Computer Science","Job Scheduling Strategies for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-10632-4_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,14]],"date-time":"2019-11-14T02:31:35Z","timestamp":1573698695000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-10632-4_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030106317","9783030106324"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-10632-4_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"13 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSSPP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Job Scheduling Strategies for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vancouver, BC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 May 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsspp2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/jsspp.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"12","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"7","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"58% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information"}}]}}