{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T17:11:30Z","timestamp":1743009090723,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031125966"},{"type":"electronic","value":"9783031125973"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-12597-3_23","type":"book-chapter","created":{"date-parts":[[2022,7,31]],"date-time":"2022-07-31T10:02:21Z","timestamp":1659261741000},"page":"369-384","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Hybrid Piece-Wise Slowdown Model for\u00a0Concurrent Kernel Execution on\u00a0GPU"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6153-7651","authenticated-orcid":false,"given":"Bernab\u00e9","family":"L\u00f3pez-Albelda","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7340-4976","authenticated-orcid":false,"given":"Francisco M.","family":"Castro","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0545-5958","authenticated-orcid":false,"given":"Jose M.","family":"Gonz\u00e1lez-Linares","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3431-6516","authenticated-orcid":false,"given":"Nicol\u00e1s","family":"Guil","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,1]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Adriaens, J.T., Compton, K., Kim, N.S., Schulte, M.J.: The case for GPGPU spatial multitasking, In: IEEE International Symposium on High-Performance Comp Architecture, pp. 1\u201312 (2012). https:\/\/doi.org\/10.1109\/HPCA.2012.6168946","key":"23_CR1","DOI":"10.1109\/HPCA.2012.6168946"},{"doi-asserted-by":"publisher","unstructured":"Xu, Q., Jeon, H., Kim, K., Ro, W.W., Annavaram, M.: Warped-slicer: efficient intra-sm slicing through dynamic resource partitioning for gpu multiprogramming. In: 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), ISCA 2016, pp. 230\u2013242 (2016). https:\/\/doi.org\/10.1109\/ISCA.2016.29","key":"23_CR2","DOI":"10.1109\/ISCA.2016.29"},{"doi-asserted-by":"publisher","unstructured":"Wang, Z., Yang, J., Melhem, R., Childers, B., Zhang, Y., Guo, M.: Simultaneous multikernel gpu: multi-tasking throughput processors via fine-grained sharing. In: IEEE International Symposium on High Performance Computer Architecture (HPCA) 2016, pp. 358\u2013369 (2016). https:\/\/doi.org\/10.1109\/HPCA.2016.7446078","key":"23_CR3","DOI":"10.1109\/HPCA.2016.7446078"},{"issue":"6","key":"23_CR4","doi-asserted-by":"publisher","first-page":"1451","DOI":"10.1109\/TPDS.2021.3115630","volume":"33","author":"C Zhao","year":"2022","unstructured":"Zhao, C., Gao, W., Nie, F., Zhou, H.: A survey of GPU multitasking methods supported by hardware architecture. IEEE Trans. Parallel Distrib. Syst. 33(6), 1451\u20131463 (2022). https:\/\/doi.org\/10.1109\/TPDS.2021.3115630","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"doi-asserted-by":"crossref","unstructured":"Zhao, X., Jahre, M., Eeckhout, L.: HSM: a Hybrid Slowdown Model for Multitasking GPUs, Association for Computing Machinery, pp. 1371\u20131385 (2020)","key":"23_CR5","DOI":"10.1145\/3373376.3378457"},{"doi-asserted-by":"publisher","unstructured":"Zhao, X., Wang, Z., Eeckhout, L.: Classification-driven search for effective SM partitioning in multitasking GPUs. In: Proceedings of the 2018 International Conference on Supercomputing, ICS 2018, pp. 65\u201375. Association for Computing Machinery, New York 2018. https:\/\/doi.org\/10.1145\/3205289.3205311","key":"23_CR6","DOI":"10.1145\/3205289.3205311"},{"issue":"1","key":"23_CR7","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1109\/TPDS.2018.2854764","volume":"30","author":"X Zhao","year":"2019","unstructured":"Zhao, X., Wang, Z., Eeckhout, L.: HeteroCore GPU to exploit TLP-resource diversity. IEEE Trans. Parallel Distrib. Syst. 30(1), 93\u2013106 (2019). https:\/\/doi.org\/10.1109\/TPDS.2018.2854764","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"doi-asserted-by":"publisher","unstructured":"Thomas, W., Toraskar, S., Singh, V.: Dynamic optimizations in GPU using roofline model. In: IEEE International Symposium on Circuits and Systems (ISCAS) 2021, pp. 1\u20135 (2021). https:\/\/doi.org\/10.1109\/ISCAS51556.2021.9401255","key":"23_CR8","DOI":"10.1109\/ISCAS51556.2021.9401255"},{"doi-asserted-by":"publisher","unstructured":"Hu, Q., Shu, J., Fan, J., Lu, Y.: Run-time performance estimation and fairness-oriented scheduling policy for concurrent GPGPU applications. In: 2016 45th International Conference on Parallel Processing (ICPP), pp. 57\u201366 (2016). https:\/\/doi.org\/10.1109\/ICPP.2016.14","key":"23_CR9","DOI":"10.1109\/ICPP.2016.14"},{"doi-asserted-by":"publisher","unstructured":"Zhao, W., et al.: Themis: predicting and reining in application-level slowdown on spatial multitasking GPUs. In: IEEE International Parallel and Distributed Processing Symposium (IPDPS) 2019, pp. 653\u2013663 (2019). https:\/\/doi.org\/10.1109\/IPDPS.2019.00074","key":"23_CR10","DOI":"10.1109\/IPDPS.2019.00074"},{"issue":"3","key":"23_CR11","doi-asserted-by":"publisher","first-page":"42","DOI":"10.1109\/MM.2008.44","volume":"28","author":"S Eyerman","year":"2008","unstructured":"Eyerman, S., Eeckhout, L.: System-level performance metrics for multiprogram workloads. IEEE Micro 28(3), 42\u201353 (2008). https:\/\/doi.org\/10.1109\/MM.2008.44","journal-title":"IEEE Micro"},{"doi-asserted-by":"publisher","unstructured":"Park, J.J.K., Park, Y., Mahlke, S.: Resource management for efficient utilization of multitasking GPUs. In: Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2017, pp. 527-540. ACM, New York (2017). https:\/\/doi.org\/10.1145\/3037697.3037707. http:\/\/doi.acm.org\/10.1145\/3037697.3037707","key":"23_CR12","DOI":"10.1145\/3037697.3037707"},{"doi-asserted-by":"publisher","unstructured":"Bakhoda, A., Yuan, G.L., Fung, W.W.L., Wong, H., Aamodt, T.M.: Analyzing CUDA workloads using a detailed GPU simulator. In: IEEE International Symposium on Performance Analysis of Systems and Software 2009, pp. 163\u2013174 (2009). https:\/\/doi.org\/10.1109\/ISPASS.2009.4919648","key":"23_CR13","DOI":"10.1109\/ISPASS.2009.4919648"},{"unstructured":"NVIDIA, Cuda sdk code samples, May 2018. https:\/\/www.nvidia.com\/object\/cuda_get_samples_3.html","key":"23_CR14"},{"doi-asserted-by":"publisher","unstructured":"Che, S., et al.: Rodinia: a benchmark suite for heterogeneous computing. In: IEEE International Symposium on Workload Characterization, 2009, IISWC 2009, pp. 44\u201354 (2009). https:\/\/doi.org\/10.1109\/IISWC.2009.5306797","key":"23_CR15","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"23_CR16","first-page":"29","volume":"127","author":"JA Stratton","year":"2012","unstructured":"Stratton, J.A., Rodrigues, C., Sung, I.-J., Obeid, N., Chang, L.-W., Anssari, N., Liu, G.D., Hwu, W.-M.W.: Parboil: a revised benchmark suite for scientific and commercial throughput computing. Center for Reliable and High-Performance Computing 127, 29 (2012)","journal-title":"Center for Reliable and High-Performance Computing"},{"doi-asserted-by":"publisher","unstructured":"G\u00f3mez-Luna, J., et al.: Collaborative heterogeneous applications for integrated-architectures, in: ISPASS, pp. 43\u201354 (2017). https:\/\/doi.org\/10.1109\/ISPASS.2017.7975269","key":"23_CR17","DOI":"10.1109\/ISPASS.2017.7975269"},{"unstructured":"Tukey, J.W.: Exploratory Data Analysis (1977)","key":"23_CR18"},{"doi-asserted-by":"publisher","unstructured":"Khairy, M., Shen, Z., Aamodt, T.M., Rogers, T.G.: Accel-sim: an extensible simulation framework for validated GPU modeling. In: 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), 2020, pp. 473\u2013486. https:\/\/doi.org\/10.1109\/ISCA45697.2020.00047","key":"23_CR19","DOI":"10.1109\/ISCA45697.2020.00047"},{"doi-asserted-by":"publisher","unstructured":"Dai, H., et al.: Accelerate GPU concurrent kernel execution by mitigating memory pipeline stalls. In: IEEE International Symposium on High Performance Computer Architecture (HPCA) 2018, pp. 208\u2013220 (2018). https:\/\/doi.org\/10.1109\/HPCA.2018.00027","key":"23_CR20","DOI":"10.1109\/HPCA.2018.00027"},{"doi-asserted-by":"publisher","unstructured":"Wang, H., Luo, F., Ibrahim, M., Kayiran, O., Jog, A.: Efficient and fair multi-programming in GPUs via effective bandwidth management. In: IEEE International Symposium on High Performance Computer Architecture (HPCA) 2018, pp. 247\u2013258 (2018). https:\/\/doi.org\/10.1109\/HPCA.2018.00030","key":"23_CR21","DOI":"10.1109\/HPCA.2018.00030"},{"doi-asserted-by":"crossref","unstructured":"Zhao, X., Jahre, M., Eeckhout, L.: HSM: a hybrid slowdown model for multitasking GPUs, in: Proceedings of the Twenty-Fifth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2020, pp. 1371\u20131385. Association for Computing Machinery, New York (2020)","key":"23_CR22","DOI":"10.1145\/3373376.3378457"},{"issue":"1","key":"23_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/LCA.2018.2889042","volume":"18","author":"J Kim","year":"2019","unstructured":"Kim, J., Cha, J., Park, J.J.K., Jeon, D., Park, Y.: Improving GPU multitasking efficiency using dynamic resource sharing. IEEE Comput. Archit. Lett. 18(1), 1\u20135 (2019). https:\/\/doi.org\/10.1109\/LCA.2018.2889042","journal-title":"IEEE Comput. Archit. Lett."}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2022: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-12597-3_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,11]],"date-time":"2022-08-11T23:14:27Z","timestamp":1660259667000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-12597-3_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031125966","9783031125973"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-12597-3_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"1 August 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 August 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 August 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.euro-par.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"102","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.97","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}