{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T12:05:15Z","timestamp":1742990715785,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030499426"},{"type":"electronic","value":"9783030499433"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-49943-3_4","type":"book-chapter","created":{"date-parts":[[2020,6,24]],"date-time":"2020-06-24T07:03:45Z","timestamp":1592982225000},"page":"66-88","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Evaluation of Directive-Based GPU Programming Models on a Block Eigensolver with Consideration of Large Sparse Matrices"],"prefix":"10.1007","author":[{"given":"Fazlay","family":"Rabbi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christopher S.","family":"Daley","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hasan Metin","family":"Aktulga","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicholas J.","family":"Wright","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,6,9]]},"reference":[{"key":"4_CR1","unstructured":"Cori-GPU system configuration. https:\/\/docs-dev.nersc.gov\/cgpu\/"},{"key":"4_CR2","unstructured":"Openmp specification. https:\/\/www.openmp.org\/wp-content\/uploads\/OpenMP-API-Specification-5.0.pdf"},{"key":"4_CR3","unstructured":"Summit system configuration. https:\/\/www.olcf.ornl.gov\/summit\/"},{"key":"4_CR4","unstructured":"HIP : Convert CUDA to Portable C++ Code (2019). https:\/\/github.com\/ROCm-Developer-Tools\/HIP. Accessed 4 Sept 2019"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Aktulga, H.M., Bulu\u00e7, A., Williams, S., Yang, C.: Optimizing sparse matrix-multiple vectors multiplication for nuclear configuration interaction calculations. In: 2014 IEEE 28th International Parallel and Distributed Processing Symposium, pp. 1213\u20131222. IEEE (2014)","DOI":"10.1109\/IPDPS.2014.125"},{"key":"4_CR6","unstructured":"Anzt, H., Tomov, S., Dongarra, J.: Implementing a sparse matrix vector product for the SELL-C\/SELL-C-$$\\sigma $$ formats on nvidia gpus. University of Tennessee, Technical report. ut-eecs-14-727 (2014)"},{"key":"4_CR7","unstructured":"Anzt, H., Tomov, S., Dongarra, J.: Accelerating the LOBPCG method on GPUs using a blocked sparse matrix vector product. In: Proceedings of the Symposium on High Performance Computing, pp. 75\u201382. Society for Computer Simulation International (2015)"},{"key":"4_CR8","doi-asserted-by":"crossref","unstructured":"Bell, N., Garland, M.: Implementing sparse matrix-vector multiplication on throughput-oriented processors. In: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis. p. 18. ACM (2009)","DOI":"10.1145\/1654059.1654078"},{"key":"4_CR9","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1145\/1837853.1693471","volume":"45","author":"JW Choi","year":"2010","unstructured":"Choi, J.W., Singh, A., Vuduc, R.W.: Model-driven autotuning of sparse matrix-vector multiply on GPUs. ACM SIGPLAN Not. 45, 115\u2013126 (2010)","journal-title":"ACM SIGPLAN Not."},{"key":"4_CR10","doi-asserted-by":"publisher","unstructured":"Cui, X., Scogland, T.R.W., de Supinski, B.R., Feng, W.: Directive-based partitioning and pipelining for graphics processing units. In: 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 575\u2013584, May 2017. https:\/\/doi.org\/10.1109\/IPDPS.2017.96","DOI":"10.1109\/IPDPS.2017.96"},{"key":"4_CR11","unstructured":"Davis, T., Hu, Y., Kolodziej, S.: The suitesparse matrix collection (2018). http:\/\/faculty.cse.tamu.edu\/davis\/suitesparse.html"},{"key":"4_CR12","unstructured":"Deldon, S., Beyer, J., Miles, D.: OpenACC and CUDA unified memory. Cray User Group (CUG), May 2018"},{"issue":"4","key":"4_CR13","doi-asserted-by":"publisher","first-page":"997","DOI":"10.4208\/cicp.OA-2016-0168","volume":"22","author":"A Dziekonski","year":"2017","unstructured":"Dziekonski, A., Rewienski, M., Sypek, P., Lamecki, A., Mrozowski, M.: GPU-accelerated LOBPCG method with inexact null-space filtering for solving generalized eigenvalue problems in computational electromagnetics analysis with higher-order fem. Commun. Comput. Phys. 22(4), 997\u20131014 (2017)","journal-title":"Commun. Comput. Phys."},{"key":"4_CR14","doi-asserted-by":"publisher","unstructured":"Rabbi, F., Daley, C.S., Aktulga, H.M., Wright, N.J.: Evaluation of directive-based GPU programming models on a block eigensolver with consideration of large sparse matrices (waccpd 2019 paper\u2019s artifact). https:\/\/doi.org\/10.6084\/m9.figshare.11636067, https:\/\/github.com\/fazlay-rabbi\/WACCPD_2019_Artifact","DOI":"10.6084\/m9.figshare.11636067"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Garland, M.: Sparse matrix computations on manycore GPU\u2019s. In: Proceedings of the 45th annual Design Automation Conference, pp. 2\u20136. ACM (2008)","DOI":"10.1145\/1391469.1391473"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Hong, C., et al.: Efficient sparse-matrix multi-vector product on GPUs. In: Proceedings of the 27th International Symposium on High-Performance Parallel and Distributed Computing, pp. 66\u201379. ACM (2018)","DOI":"10.1145\/3208040.3208062"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Khorasani, F., Gupta, R., Bhuyan, L.N.: Scalable SIMD-efficient graph processing on GPUs. In: 2015 International Conference on Parallel Architecture and Compilation (PACT), pp. 39\u201350. IEEE (2015)","DOI":"10.1109\/PACT.2015.15"},{"key":"4_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11227-019-02966-8","volume":"75","author":"M Knap","year":"2019","unstructured":"Knap, M., Czarnul, P.: Performance evaluation of unified memory with prefetching and oversubscription for selected parallel CUDA applications on NVIDIA Pascal and Volta GPUs. J. Supercomput. 75, 1\u201321 (2019)","journal-title":"J. Supercomput."},{"issue":"2","key":"4_CR19","doi-asserted-by":"publisher","first-page":"517","DOI":"10.1137\/S1064827500366124","volume":"23","author":"AV Knyazev","year":"2001","unstructured":"Knyazev, A.V.: Toward the optimal preconditioned eigensolver: locally optimal block preconditioned conjugate gradient method. SIAM J. Sci. Comput. 23(2), 517\u2013541 (2001)","journal-title":"SIAM J. Sci. Comput."},{"key":"4_CR20","unstructured":"Knyazev, A.V., Argentati, M.E.: Implementation of a preconditioned eigensolver using hypre (2005)"},{"issue":"5","key":"4_CR21","doi-asserted-by":"publisher","first-page":"2224","DOI":"10.1137\/060661624","volume":"29","author":"AV Knyazev","year":"2007","unstructured":"Knyazev, A.V., Argentati, M.E., Lashuk, I., Ovtchinnikov, E.E.: Block locally optimal preconditioned eigenvalue xolvers (BLOPEX) in HYPRE and PETSc. SIAM J. Sci. Comput. 29(5), 2224\u20132239 (2007)","journal-title":"SIAM J. Sci. Comput."},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Lanczos, C.: An Iteration Method for the Solution of the Eigenvalue Problem of Linear Differential and Integral Operators. United States Government Press Office, Los Angeles (1950)","DOI":"10.6028\/jres.045.026"},{"key":"4_CR23","unstructured":"Larrea, V.G.V., Budiardja, R., Gayatri, R., Daley, C., Hernandez, O., Joubert, W.: Experiences porting mini-applications to OpenACC and OpenMP on heterogeneous systems. In: Cray User Group (CUG), May 2019"},{"key":"4_CR24","first-page":"012019","volume":"403","author":"P Maris","year":"2012","unstructured":"Maris, P., et al.: Large-scale ab initio configuration interaction calculations for light nuclei. J. Phys.: Conf. Ser. 403, 012019 (2012)","journal-title":"J. Phys.: Conf. Ser."},{"issue":"1","key":"4_CR25","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/j.procs.2010.04.012","volume":"1","author":"P Maris","year":"2010","unstructured":"Maris, P., Sosonkina, M., Vary, J.P., Ng, E., Yang, C.: Scaling of ab-initio nuclear physics calculations on multicore computer architectures. Procedia Comput. Sci. 1(1), 97\u2013106 (2010)","journal-title":"Procedia Comput. Sci."},{"key":"4_CR26","unstructured":"Naumov, M., Chien, L., Vandermersch, P., Kapasi, U.: cuSPARSE library. In: GPU Technology Conference (2010)"},{"issue":"7","key":"4_CR27","doi-asserted-by":"publisher","first-page":"968","DOI":"10.1093\/comjnl\/bxt038","volume":"57","author":"G Ortega","year":"2014","unstructured":"Ortega, G., V\u00e1zquez, F., Garc\u00eda, I., Garz\u00f3n, E.M.: FastSpMM: an efficient library for sparse matrix matrix product on GPUs. Comput. J. 57(7), 968\u2013979 (2014)","journal-title":"Comput. J."},{"key":"4_CR28","unstructured":"Sakharnykh, N.: Everything You Need To Know About Unified Memory. Presented at GPU Technology Conference (GTC) (2018). http:\/\/on-demand.gputechconf.com\/gtc\/2018\/presentation\/s8430-everything-you-need-to-know-about-unified-memory.pdf. Accessed Mar 2018"},{"key":"4_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cpc.2017.09.004","volume":"222","author":"M Shao","year":"2018","unstructured":"Shao, M., Aktulga, H.M., Yang, C., Ng, E.G., Maris, P., Vary, J.P.: Accelerating nuclear configuration interaction calculations through a preconditioned block iterative eigensolver. Comput. Phys. Commun. 222, 1\u201313 (2018)","journal-title":"Comput. Phys. Commun."},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Sternberg, P., et al.: Accelerating configuration interaction calculations for nuclear structure. In: Proceedings of the 2008 ACM\/IEEE Conference on Supercomputing, p. 15. IEEE Press (2008)","DOI":"10.1109\/SC.2008.5220090"},{"key":"4_CR31","unstructured":"Vazhkudai, S.S., et al.: The design, deployment, and evaluation of the coral pre-exascale systems. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage, and Analysis, p. 52. IEEE Press (2018)"},{"key":"4_CR32","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"100","DOI":"10.1007\/978-981-13-7025-0_10","volume-title":"Geo-informatics in Sustainable Ecosystem and Society","author":"Y Wang","year":"2019","unstructured":"Wang, Y.: Research on matrix multiplication based on the combination of OpenACC and CUDA. In: Xie, Y., Zhang, A., Liu, H., Feng, L. (eds.) GSES 2018. CCIS, vol. 980, pp. 100\u2013108. Springer, Singapore (2019). https:\/\/doi.org\/10.1007\/978-981-13-7025-0_10"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Williams, S., Waterman, A., Patterson, D.: Roofline: an insightful visual performance model for floating-point programs and multicore architectures. Technical report, Lawrence Berkeley National Lab (LBNL), Berkeley, CA, USA (2009)","DOI":"10.2172\/1407078"},{"key":"4_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"672","DOI":"10.1007\/978-3-319-96983-1_48","volume-title":"Euro-Par 2018: Parallel Processing","author":"C Yang","year":"2018","unstructured":"Yang, C., Bulu\u00e7, A., Owens, J.D.: Design principles for sparse matrix multiplication on the GPU. In: Aldinucci, M., Padovani, L., Torquati, M. (eds.) Euro-Par 2018. LNCS, vol. 11014, pp. 672\u2013687. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-96983-1_48"},{"issue":"4","key":"4_CR35","doi-asserted-by":"publisher","first-page":"231","DOI":"10.14778\/1938545.1938548","volume":"4","author":"X Yang","year":"2011","unstructured":"Yang, X., Parthasarathy, S., Sadayappan, P.: Fast sparse matrix-vector multiplication on GPUs: implications for graph mining. Proc. VLDB Endow. 4(4), 231\u2013242 (2011)","journal-title":"Proc. VLDB Endow."}],"container-title":["Lecture Notes in Computer Science","Accelerator Programming Using Directives"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-49943-3_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,12]],"date-time":"2021-01-12T10:49:17Z","timestamp":1610448557000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-49943-3_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030499426","9783030499433"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-49943-3_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"9 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Artifact Availability"}},{"value":"All author-created software artifacts are maintained in a public repository under an OSI-approved license.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Software Artifact Availability"}},{"value":"All author-created hardware artifacts are maintained in a public repository under an OSI-approved license.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Hardware Artifact Availability"}},{"value":"All author-created data artifacts are maintained in a public repository under an OSI-approved license.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Data Artifact Availability"}},{"value":"None of the associated artifacts, author-created or otherwise, are proprietary.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Proprietary Artifacts"}},{"value":"<tt>10.6084\/m9.figshare.<\/tt><tt>11636067<\/tt>. The details of the baseline experimental setup, and modifications made for the paper are also available at \u00a0[].","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"List of URLs and\/or DOIs Where Artifacts are Available"}},{"value":"WACCPD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Accelerator Programming Using Directives","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denver, CO","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"waccpd2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/waccpd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.29","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.83","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}