{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:46:05Z","timestamp":1768031165241,"version":"3.49.0"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030495558","type":"print"},{"value":"9783030495565","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-49556-5_1","type":"book-chapter","created":{"date-parts":[[2020,6,8]],"date-time":"2020-06-08T23:05:06Z","timestamp":1591657506000},"page":"3-19","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Performance Analysis of GPU Programming Models Using the Roofline Scaling Trajectories"],"prefix":"10.1007","author":[{"given":"Khaled Z.","family":"Ibrahim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Samuel","family":"Williams","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Leonid","family":"Oliker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,6,9]]},"reference":[{"issue":"6","key":"1_CR1","doi-asserted-by":"crossref","first-page":"685","DOI":"10.1002\/cpe.1553","volume":"22","author":"L Adhianto","year":"2010","unstructured":"Adhianto, L., et al.: HPCToolkit: tools for performance analysis of optimized parallel programs. Concurr. Comput. Pract. Exp. 22(6), 685\u2013701 (2010). http:\/\/hpctoolkit.org","journal-title":"Concurr. Comput. Pract. Exp."},{"key":"1_CR2","unstructured":"Bailey, D., Harris, T., Saphir, W., Van Der Wijngaart, R., Woo, A., Yarrow, M.: The NAS parallel benchmarks 2.0. Technical report NAS-95-010, NASA Ames Research Center (1995)"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Calotoiu, A., Hoefler, T., Poke, M., Wolf, F.: Using automated performance modeling to find scalability bugs in complex codes. In: SC 2013 Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2013)","DOI":"10.1145\/2503210.2503277"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Yang, C., Kurth, T., Williams, S.: Hierarchical Roofline analysis for GPUs: accelerating performance optimization for the NERSC-9 Perlmutter system. Cray User Group (CUG), May 2019","DOI":"10.1002\/cpe.5547"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Chatterjee, N., O\u2019Connor, M., Loh, G.H., Jayasena, N., Balasubramonia, R.: Managing DRAM latency divergence in irregular GPGPU applications. In: SC 2014 Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 128\u2013139 (2014)","DOI":"10.1109\/SC.2014.16"},{"key":"1_CR6","volume-title":"CUDA Programming: A Developer\u2019s Guide to Parallel Computing with GPUs","author":"S Cook","year":"2013","unstructured":"Cook, S.: CUDA Programming: A Developer\u2019s Guide to Parallel Computing with GPUs, 1st edn. Morgan Kaufmann Publishers Inc., San Francisco (2013)","edition":"1"},{"key":"1_CR7","unstructured":"Cray: The Cray Performance Measurement and Analysis Tools. https:\/\/pubs.cray.com\/content\/S-2376\/6.4.0\/cray-performance-measurement-and-analysis-tools-user-guide-640\/craypat"},{"issue":"1","key":"1_CR8","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/L-CA.2013.6","volume":"13","author":"A Ilic","year":"2014","unstructured":"Ilic, A., Pratas, F., Sousa, L.: Cache-aware Roofline model: upgrading the loft. IEEE Comput. Archit. Lett. 13(1), 21\u201324 (2014)","journal-title":"IEEE Comput. Archit. Lett."},{"key":"1_CR9","unstructured":"D\u00fcmmler, J.: A CUDA version of NPB 3.3.1. https:\/\/www.tu-chemnitz.de\/informatik\/PI\/sonstiges\/downloads\/npb-gpu\/index.php.en"},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Ibrahim, K., Williams, S., Oliker, L.: Roofline scaling trajectories: a method for parallel application and architectural performance analysis. In: International Conference on High Performance Computing & Simulation (HPCS) (2018)","DOI":"10.1109\/HPCS.2018.00065"},{"key":"1_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/978-3-642-23178-0_28","volume-title":"Parallel Computing Technologies","author":"A Marowka","year":"2011","unstructured":"Marowka, A.: On performance analysis of a multithreaded application parallelized by different programming models using Intel VTune. In: Malyshkin, V. (ed.) PaCT 2011. LNCS, vol. 6873, pp. 317\u2013331. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-23178-0_28"},{"key":"1_CR12","unstructured":"Measuring Roofline Quantities on NVIDIA GPUs: Portability Across DOE Office of Science HPC Facilities. https:\/\/performanceportability.org\/perfport\/measurements\/gpu\/"},{"key":"1_CR13","unstructured":"nVidia: CUDA Profiler Users Guide. https:\/\/docs.nvidia.com\/cuda\/pdf\/CUDA_Profiler_Users_Guide.pdf"},{"key":"1_CR14","unstructured":"nVidia: NVIDIA Tesla V100 GPU Architecture. https:\/\/images.nvidia.com\/content\/volta-architecture\/pdf\/volta-architecture-whitepaper.pdf"},{"key":"1_CR15","unstructured":"OpenACC STANDARD Organization: OpenACC Application Programming Interface. https:\/\/www.openacc.org"},{"issue":"2","key":"1_CR16","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1177\/1094342006064482","volume":"20","author":"SS Shende","year":"2006","unstructured":"Shende, S.S., Malony, A.D.: The tau parallel performance system. Int. J. High Perform. Comput. Appl. 20(2), 287\u2013311 (2006)","journal-title":"Int. J. High Perform. Comput. Appl."},{"key":"1_CR17","unstructured":"Top 500 Supercomputers. http:\/\/www.top500.org"},{"issue":"4","key":"1_CR18","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/1498765.1498785","volume":"52","author":"S Williams","year":"2009","unstructured":"Williams, S., Watterman, A., Patterson, D.: Roofline: an insightful visual performance model for multicore architectures. Commun. ACM 52(4), 65\u201376 (2009). https:\/\/doi.org\/10.1145\/1498765.1498785","journal-title":"Commun. ACM"},{"key":"1_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1007\/978-3-319-17473-0_5","volume-title":"Languages and Compilers for Parallel Computing","author":"R Xu","year":"2015","unstructured":"Xu, R., Tian, X., Chandrasekaran, S., Yan, Y., Chapman, B.: NAS parallel benchmarks for GPGPUs using a directive-based programming model. In: Brodman, J., Tu, P. (eds.) LCPC 2014. LNCS, vol. 8967, pp. 67\u201381. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-17473-0_5"}],"container-title":["Lecture Notes in Computer Science","Benchmarking, Measuring, and Optimizing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-49556-5_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T04:29:46Z","timestamp":1696220986000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-49556-5_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030495558","9783030495565"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-49556-5_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"9 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Bench","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Benchmarking, Measuring and Optimization","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denver, CO","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 November 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 November 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bench2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.benchcouncil.org\/bench19\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"79","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"20","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"11","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"10","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}