{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:31:25Z","timestamp":1742913085486,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031439421"},{"type":"electronic","value":"9783031439438"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43943-8_3","type":"book-chapter","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T13:52:25Z","timestamp":1694699545000},"page":"46-73","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Memory-Aware Latency Prediction Model for\u00a0Concurrent Kernels in\u00a0Partitionable GPUs: Simulations and\u00a0Experiments"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2107-4178","authenticated-orcid":false,"given":"Alessio","family":"Masola","sequence":"first","affiliation":[]},{"given":"Nicola","family":"Capodieci","sequence":"additional","affiliation":[]},{"given":"Roberto","family":"Cavicchioli","sequence":"additional","affiliation":[]},{"given":"Ignacio Sanudo","family":"Olmedo","sequence":"additional","affiliation":[]},{"given":"Benjamin","family":"Rouxel","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,15]]},"reference":[{"key":"3_CR1","doi-asserted-by":"crossref","unstructured":"Adriaens, J.T., Compton, K., Kim, N.S., Schulte, M.J.: The case for gpgpu spatial multitasking. In: IEEE International Symposium on High-Performance Comp Architecture, pp. 1\u201312. IEEE (2012)","DOI":"10.1109\/HPCA.2012.6168946"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Aguilera, P., Morrow, K., Kim, N.S.: Fair share: Allocation of gpu resources for both performance and fairness. In: 2014 IEEE 32nd International Conference on Computer Design (ICCD), pp. 440\u2013447. IEEE (2014)","DOI":"10.1109\/ICCD.2014.6974717"},{"key":"3_CR3","unstructured":"et al., A.K.: Tango: A deep neural network benchmark suite for various accelerators. CoRR abs\/ arXiv: 1901.04987 (2019)"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Bak, S., Yao, G., Pellizzoni, R., Caccamo, M.: Memory-aware scheduling of multicore task sets for real-time systems. In: 2012 IEEE International Conference on Embedded and Real-Time Computing Systems and Applications, pp. 300\u2013309. IEEE (2012)","DOI":"10.1109\/RTCSA.2012.48"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Capodieci, N., Cavicchioli, R., Marongiu, A.: A taxonomy of modern gpgpu programming methods: On the benefits of a unified specification. IEEE Trans. Comput.-Aided Design Integrated Circ. Syst. (2021)","DOI":"10.1109\/TCAD.2021.3082863"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Che, S., et al.: Rodinia: a benchmark suite for heterogeneous computing. In: 2009 IEEE International Symposium on Workload Characterization (IISWC), pp. 44\u201354. IEEE (2009)","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Dai, H., et al.: Accelerate gpu concurrent kernel execution by mitigating memory pipeline stalls. In: 2018 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 208\u2013220. IEEE (2018)","DOI":"10.1109\/HPCA.2018.00027"},{"key":"3_CR8","doi-asserted-by":"publisher","unstructured":"Gupta, K., Stuart, J.A., Owens, J.D.: A study of persistent threads style gpu programming for gpgpu workloads. In: 2012 Innovative Parallel Computing (InPar), pp. 1\u201314 (2012). https:\/\/doi.org\/10.1109\/InPar.2012.6339596","DOI":"10.1109\/InPar.2012.6339596"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Hong, S., Kim, H.: An analytical model for a gpu architecture with memory-level and thread-level parallelism awareness. In: Proceedings of the 36th Annual International Symposium on Computer Architecture, pp. 152\u2013163 (2009)","DOI":"10.1145\/1555754.1555775"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Hu, Q., Shu, J., Fan, J., Lu, Y.: Run-time performance estimation and fairness-oriented scheduling policy for concurrent gpgpu applications. In: 2016 45th International Conference on Parallel Processing (ICPP), pp. 57\u201366. IEEE (2016)","DOI":"10.1109\/ICPP.2016.14"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Jog, A., et al.: Application-aware memory system for fair and efficient execution of concurrent gpgpu applications. In: Proceedings of workshop on general purpose processing using GPUs, pp. 1\u20138 (2014)","DOI":"10.1145\/2588768.2576780"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Jog, A., et al.: Anatomy of gpu memory system for multi-application execution. In: Proceedings of the 2015 International Symposium on Memory System, pp. 223\u2013234 (2015)","DOI":"10.1145\/2818950.2818979"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Khairy, M., Shen, Z., Aamodt, T.M., Rogers, T.G.: Accel-sim: an extensible simulation framework for validated gpu modeling. In: 2020 ACM\/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), pp. 473\u2013486. IEEE (2020)","DOI":"10.1109\/ISCA45697.2020.00047"},{"key":"3_CR14","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/j.jpdc.2017.04.002","volume":"107","author":"E Konstantinidis","year":"2017","unstructured":"Konstantinidis, E., Cotronis, Y.: A quantitative roofline model for gpu kernel performance estimation using micro-benchmarks and hardware metric profiling. J. Parallel Distrib. Comput. 107, 37\u201356 (2017)","journal-title":"J. Parallel Distrib. Comput."},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Marangoz, E.C., Kang, K.D., Shin, S.: Designing gpu architecture for memory bandwidth reservation. In: 2021 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), pp. 87\u201389. IEEE (2021)","DOI":"10.1109\/ISPASS51385.2021.00024"},{"issue":"2","key":"3_CR16","doi-asserted-by":"publisher","first-page":"285","DOI":"10.4208\/cicp.110113.010813a","volume":"15","author":"CA Navarro","year":"2014","unstructured":"Navarro, C.A., Hitschfeld-Kahler, N., Mateu, L.: A survey on parallel computing and its applications in data-parallel problems using gpu architectures. Commun. Comput. Phys. 15(2), 285\u2013329 (2014)","journal-title":"Commun. Comput. Phys."},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Olmedo, I.S., Capodieci, N., Martinez, J.L., Marongiu, A., Bertogna, M.: Dissecting the cuda scheduling hierarchy: a performance and predictability perspective. In: 2020 IEEE Real-Time and Embedded Technology and Applications Symposium (RTAS), pp. 213\u2013225. IEEE (2020)","DOI":"10.1109\/RTAS48715.2020.000-5"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Pai, S.E.A.: Improving GPGPU concurrency with elastic kernels. In: Proceedings of the Eighteenth International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2013 pp. 407\u2013418. Association for Computing Machinery, New York (2013)","DOI":"10.1145\/2451116.2451160"},{"key":"3_CR19","unstructured":"Rouxel, B., Skalistis, S., Derrien, S., Puaut, I.: Hiding communication delays in contention-free execution for spm-based multi-core architectures. In: 31st Euromicro Conference on Real-Time Systems (ECRTS 2019). Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik (2019)"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Silva, K.P., Arcaro, L.F., De Oliveira, R.S.: On using gev or gumbel models when applying evt for probabilistic wcet estimation. In: 2017 IEEE Real-Time Systems Symposium (RTSS), pp. 220\u2013230. IEEE (2017)","DOI":"10.1109\/RTSS.2017.00028"},{"key":"3_CR21","unstructured":"Sun, Y., Agostini, N.B., Dong, S., Kaeli, D.: Summarizing cpu and gpu design trends with product data. arXiv preprint arXiv:1911.11313 (2019)"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Thomas, W., Toraskar, S., Singh, V.: Dynamic optimizations in gpu using roofline model. In: 2021 IEEE International Symposium on Circuits and Systems (ISCAS), pp. 1\u20135. IEEE (2021)","DOI":"10.1109\/ISCAS51556.2021.9401255"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Ukidave, Y., Kalra, C., Kaeli, D., Mistry, P., Schaa, D.: Runtime support for adaptive spatial partitioning and inter-kernel communication on gpus. In: 2014 IEEE 26th International Symposium on Computer Architecture and High Performance Computing, pp. 168\u2013175. IEEE (2014)","DOI":"10.1109\/SBAC-PAD.2014.43"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Wang, Z., Yang, J., Melhem, R., Childers, B., Zhang, Y., Guo, M.: Simultaneous multikernel gpu: Multi-tasking throughput processors via fine-grained sharing. In: 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 358\u2013369. IEEE (2016)","DOI":"10.1109\/HPCA.2016.7446078"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"Wartel, F., et al.: Timing analysis of an avionics case study on complex hardware\/software platforms. In: 2015 Design, Automation & Test in Europe Conference & Exhibition (DATE), pp. 397\u2013402. IEEE (2015)","DOI":"10.7873\/DATE.2015.0189"},{"issue":"4","key":"3_CR26","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009)","journal-title":"Commun. ACM"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Xu, Q., Jeon, H., Kim, K., Ro, W.W., Annavaram, M.: Warped-slicer: efficient intra-sm slicing through dynamic resource partitioning for gpu multiprogramming. In: 2016 ACM\/IEEE 43rd Annual International Symposium on Computer Architecture (ISCA), pp. 230\u2013242. IEEE (2016)","DOI":"10.1109\/ISCA.2016.29"},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"Yandrofski, T., Chen, J., Otterness, N., Anderson, J.H., Smith, F.D.: Making powerful enemies on nvidia gpus. In: 2022 IEEE Real-Time Systems Symposium (RTSS), pp. 383\u2013395. IEEE (2022)","DOI":"10.1109\/RTSS55097.2022.00040"},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Zhao, X., Jahre, M., Eeckhout, L.: Hsm: a hybrid slowdown model for multitasking gpus. In: Proceedings of the Twenty-fifth International Conference on Architectural Support for Programming Languages and Operating Systems, pp. 1371\u20131385 (2020)","DOI":"10.1145\/3373376.3378457"}],"container-title":["Lecture Notes in Computer Science","Job Scheduling Strategies for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43943-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T13:52:49Z","timestamp":1694699569000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43943-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031439421","9783031439438"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43943-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"15 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"JSSPP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Workshop on Job Scheduling Strategies for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"St. Petersburg, FL","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 May 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 May 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"jsspp2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"14","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"9","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"64% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.8","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}