{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T08:32:27Z","timestamp":1777105947743,"version":"3.51.4"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030499426","type":"print"},{"value":"9783030499433","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-49943-3_7","type":"book-chapter","created":{"date-parts":[[2020,6,24]],"date-time":"2020-06-24T07:03:45Z","timestamp":1592982225000},"page":"140-163","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["A Portable SIMD Primitive Using Kokkos for Heterogeneous Architectures"],"prefix":"10.1007","author":[{"given":"Damodar","family":"Sahasrabudhe","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric T.","family":"Phipps","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sivasankaran","family":"Rajamanickam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Martin","family":"Berzins","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,6,9]]},"reference":[{"key":"7_CR1","doi-asserted-by":"publisher","first-page":"693","DOI":"10.1016\/j.energy.2018.07.031","volume":"160","author":"W Adamczyk","year":"2018","unstructured":"Adamczyk, W., et al.: Application of LES-CFD for predicting pulverized-coal working conditions after installation of NOx control system. Energy 160, 693\u2013709 (2018)","journal-title":"Energy"},{"key":"7_CR2","unstructured":"Berzins, M., et al.: Extending the Uintah framework through the petascale modeling of detonation in arrays of high explosive devices. SIAM J. Sci. Comput. 38, 101\u2013122 (2016). http:\/\/www.sci.utah.edu\/publications\/Ber2015a\/detonationsiam16-2.pdf"},{"key":"7_CR3","unstructured":"Carr, S.: Combining optimization for cache and instruction-level parallelism. In: Proceedings of the 1996 Conference on Parallel Architectures and Compilation Technique, pp. 238\u2013247. IEEE (1996)"},{"key":"7_CR4","unstructured":"Cope, B., et al.: Implementation of 2D Convolution on FPGA, GPU and CPU. Imperial College Report, pp. 2\u20135 (2006)"},{"issue":"12","key":"7_CR5","doi-asserted-by":"publisher","first-page":"3202","DOI":"10.1016\/j.jpdc.2014.07.003","volume":"74","author":"H Edwards","year":"2014","unstructured":"Edwards, H., Trott, C., Sunderland, D.: Kokkos: enabling manycore performance portability through polymorphic memory access patterns. J. Parallel Distrib. Comput. 74(12), 3202\u20133216 (2014)","journal-title":"J. Parallel Distrib. Comput."},{"key":"7_CR6","unstructured":"U.S. Department of Energy: U.S. Department of Energy and Cray to Deliver Record-Setting Frontier Supercomputer at ORNL. https:\/\/www.energy.gov\/articles\/us-department-energy-and-cray-deliver-record-setting-frontier-supercomputer-ornl (2019)"},{"issue":"5","key":"7_CR7","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/40.621210","volume":"17","author":"R Espasa","year":"1997","unstructured":"Espasa, R., Valero, M.: Exploiting instruction-and data-level parallelism. IEEE Micro 17(5), 20\u201327 (1997)","journal-title":"IEEE Micro"},{"key":"7_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/978-3-642-19861-8_13","volume-title":"Compiler Construction","author":"T Henretty","year":"2011","unstructured":"Henretty, T., Stock, K., Pouchet, L.-N., Franchetti, F., Ramanujam, J., Sadayappan, P.: Data layout transformation for stencil computations on short-vector SIMD architectures. In: Knoop, J. (ed.) CC 2011. LNCS, vol. 6601, pp. 225\u2013245. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-19861-8_13"},{"issue":"6","key":"7_CR9","doi-asserted-by":"publisher","first-page":"371","DOI":"10.1145\/2345156.2254108","volume":"47","author":"J Holewinski","year":"2012","unstructured":"Holewinski, J., et al.: Dynamic trace-based analysis of vectorization potential of applications. ACM SIGPLAN Not. 47(6), 371\u2013382 (2012)","journal-title":"ACM SIGPLAN Not."},{"key":"7_CR10","unstructured":"Holmen, J.: Private communication (2018)"},{"key":"7_CR11","unstructured":"Holmen, J.K., et al.: Portably improving uintah\u2019s readiness for exascale systems through the use of kokkos. SCI Institute (2019). http:\/\/www.sci.utah.edu\/publications\/Hol2019a\/UUSCI-2019-001.pdf"},{"key":"7_CR12","doi-asserted-by":"crossref","unstructured":"Hornung, R., Keasler, J.: The RAJA portability layer: overview and status. Technical report, Lawrence Livermore National Laboratories (LLNL), Livermore, CA, United States (2014)","DOI":"10.2172\/1169830"},{"key":"7_CR13","unstructured":"Howard, M., et al.: Employing multiple levels of parallelism for CFD at large scales on next generation high-performance computing platforms. In: 2018 Proceedings of the Tenth International Conference on Computational Fluid Dynamics (ICCFD 10), Barcelona, 9\u201313 July 2018"},{"key":"7_CR14","unstructured":"Intel: Requirements for Vectorizable Loops (2012). https:\/\/software.intel.com\/en-us\/articles\/requirements-for-vectorizable-loops"},{"key":"7_CR15","unstructured":"Jacob, A., et al.: Towards performance portable GPU programming with RAJA. In: Workshop on Portability Among HPC Architectures for Scientific Applications (2015)"},{"key":"7_CR16","volume-title":"Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition","author":"J Jeffers","year":"2016","unstructured":"Jeffers, J., Reinders, J., Sodani, A.: Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition. Morgan Kaufmann, Burlington (2016)"},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Karpi\u0144ski, P., McDonald, J.: A high-performance portable abstract interface for explicit SIMD vectorization. In: Proceedings of the 8th International Workshop on Programming Models and Applications for Multicores and Manycores. ACM (2017)","DOI":"10.1145\/3026937.3026939"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Kim, K., et al.: Designing vector-friendly compact BLAS and LAPACK kernels. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, p. 55. ACM (2017)","DOI":"10.1145\/3126908.3126941"},{"key":"7_CR19","unstructured":"Kim, K., et al.: KokkosKernels v. 0.9, Version 00 (2 2017). https:\/\/www.osti.gov\/\/servlets\/purl\/1349511"},{"issue":"11","key":"7_CR20","doi-asserted-by":"publisher","first-page":"1409","DOI":"10.1002\/spe.1149","volume":"42","author":"M Kretz","year":"2012","unstructured":"Kretz, M., Lindenstruth, V.: Vc: a C++ library for explicit vectorization. Softw. Pract. Exp. 42(11), 1409\u20131430 (2012)","journal-title":"Softw. Pract. Exp."},{"issue":"8","key":"7_CR21","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1145\/2370036.2145825","volume":"47","author":"R Lei\u00dfa","year":"2012","unstructured":"Lei\u00dfa, R., Hack, S., Wald, I.: Extending a C-like language for portable SIMD programming. ACM SIGPLAN Not. 47(8), 65\u201374 (2012)","journal-title":"ACM SIGPLAN Not."},{"key":"7_CR22","unstructured":"Medina, D., St-Cyr, A., Warburton, T.: OCCA: A unified approach to multi-threading languages. arXiv preprint arXiv:1403.0968 (2014)"},{"key":"7_CR23","unstructured":"IT Peer Network: Think Exponential: Intel\u2019s Xe Architecture. https:\/\/itpeernetwork.intel.com\/intel-xe-compute#gs.emsehp (2019)"},{"key":"7_CR24","unstructured":"Opencl, K., Munshi, A.: The openCL specification version: 1.0 document revision: 48, 23 (2008). https:\/\/www.khronos.org\/registry\/OpenCL\/specs\/opencl-1.0.pdf"},{"key":"7_CR25","doi-asserted-by":"publisher","first-page":"23","DOI":"10.1016\/j.ijmultiphaseflow.2014.03.002","volume":"63","author":"J Pedel","year":"2014","unstructured":"Pedel, J., Thornock, J., Smith, S., Smith, P.: Large eddy simulation of polydisperse particles in turbulent coaxial jets using the direct quadrature method of moments. Int. J. Multiph. Flow 63, 23\u201338 (2014). https:\/\/doi.org\/10.1016\/j.ijmultiphaseflow.2014.03.002","journal-title":"Int. J. Multiph. Flow"},{"key":"7_CR26","unstructured":"Pai, S., Govindarajan, R., Thazhuthaveetil, M.: PLASMA: portable programming for SIMD heterogeneous accelerators. In: Workshop on Language, Compiler, and Architecture Support for GPGPU, held in conjunction with HPCA\/PPoPP (2010)"},{"issue":"2","key":"7_CR27","doi-asserted-by":"publisher","first-page":"C162","DOI":"10.1137\/15M1044679","volume":"39","author":"E Phipps","year":"2017","unstructured":"Phipps, E., D\u2019Elia, M., Edwards, H., Hoemmen, M., Hu, J., Rajamanickam, S.: Embedded ensemble propagation for improving performance, portability, and scalability of uncertainty quantification on emerging computational architectures. SIAM J. Sci. Comput. 39(2), C162\u2013C193 (2017)","journal-title":"SIAM J. Sci. Comput."},{"key":"7_CR28","unstructured":"Phipps, E., Tuminaro, R., Miller, C.: Stokhos: trilinos tools for embedded stochastic-galerkin uncertainty quantification methods. Technical report, Sandia National Laboratories (SNL-NM), Albuquerque, NM, United States (2008)"},{"issue":"2","key":"7_CR29","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MM.2017.35","volume":"37","author":"N Stephens","year":"2017","unstructured":"Stephens, N., et al.: The ARM scalable vector extension. IEEE Micro 37(2), 26\u201339 (2017)","journal-title":"IEEE Micro"},{"key":"7_CR30","doi-asserted-by":"crossref","unstructured":"Tian, X., et al.: LLVM compiler implementation for explicit parallelization and SIMD vectorization. In: Proceedings of the Fourth Workshop on the LLVM Compiler Infrastructure in HPC, p. 4. ACM (2017)","DOI":"10.1145\/3148173.3148191"},{"key":"7_CR31","unstructured":"Trott, C.R.: Kokkos: the C++ performance portability programming model. Technical report, Sandia National Laboratories (SNL-NM), Albuquerque, NM, United States (2017)"},{"key":"7_CR32","doi-asserted-by":"crossref","unstructured":"Wang, H., Wu, P., Tanase, I., Serrano, M., Moreira, J.: Simple, portable and fast SIMD intrinsic programming: generic simd library. In: Proceedings of the 2014 Workshop on Programming Models for SIMD\/Vector Processing. ACM (2014)","DOI":"10.1145\/2568058.2568059"},{"key":"7_CR33","doi-asserted-by":"crossref","unstructured":"Zenker, E., et al.: Alpaka-an abstraction library for parallel kernel acceleration. In: 2016 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 631\u2013640. IEEE (2016)","DOI":"10.1109\/IPDPSW.2016.50"}],"container-title":["Lecture Notes in Computer Science","Accelerator Programming Using Directives"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-49943-3_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,11]],"date-time":"2021-01-11T09:03:33Z","timestamp":1610355813000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-49943-3_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030499426","9783030499433"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-49943-3_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"9 June 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"WACCPD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Workshop on Accelerator Programming Using Directives","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Denver, CO","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 November 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"waccpd2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/waccpd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Linklings","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"54% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.29","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1.83","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}