{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T18:23:29Z","timestamp":1776018209974,"version":"3.50.1"},"publisher-location":"Cham","reference-count":12,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319642024","type":"print"},{"value":"9783319642031","type":"electronic"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-64203-1_37","type":"book-chapter","created":{"date-parts":[[2017,7,31]],"date-time":"2017-07-31T15:03:35Z","timestamp":1501513415000},"page":"511-522","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Optimized Batched Linear Algebra for Modern Architectures"],"prefix":"10.1007","author":[{"given":"Jack","family":"Dongarra","sequence":"first","affiliation":[]},{"given":"Sven","family":"Hammarling","sequence":"additional","affiliation":[]},{"given":"Nicholas J.","family":"Higham","sequence":"additional","affiliation":[]},{"given":"Samuel D.","family":"Relton","sequence":"additional","affiliation":[]},{"given":"Mawussi","family":"Zounon","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,8,1]]},"reference":[{"key":"37_CR1","unstructured":"Abadi, M., Agarwal, A., Barham, P., Brevdo, E., et al.: TensorFlow: large-scale machine learning on heterogeneous systems (2015). tensorflow.org"},{"key":"37_CR2","doi-asserted-by":"crossref","unstructured":"Abdelfattah, A., Haidar, A., Tomov, S., Dongarra, J.J.: Performance, design, and autotuning of batched GEMM for GPUs. In: Proceedings of High Performance Computing - 31st International Conference, ISC High Performance 2016, Frankfurt, Germany, 19\u201323 June 2016, pp. 21\u201338 (2016)","DOI":"10.1007\/978-3-319-41321-1_2"},{"key":"37_CR3","unstructured":"Al-Rfou, R., Alain, G., Almahairi, A., Angermueller, C., Bahdanau, D., et al.: Theano: a python framework for fast computation of mathematical expressions. arXiv e-prints, http:\/\/arxiv.org\/abs\/1605.02688, May 2016"},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"Anderson, M.J., Sheffield, D., Keutzer, K.: A predictive model for solving small linear algebra problems in GPU registers. In: 2012 IEEE 26th International Parallel and Distributed Processing Symposium (IPDPS), pp. 2\u201313. IEEE (2012)","DOI":"10.1109\/IPDPS.2012.11"},{"issue":"3","key":"37_CR5","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1145\/356044.356047","volume":"9","author":"I Duff","year":"1983","unstructured":"Duff, I., Reid, J.K.: The multifrontal solution of indefinite sparse symmetric linear equations. ACM Trans. Math. Softw. 9(3), 302\u2013325 (1983)","journal-title":"ACM Trans. Math. Softw."},{"key":"37_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1007\/978-3-319-20119-1_3","volume-title":"High Performance Computing","author":"A Haidar","year":"2015","unstructured":"Haidar, A., Dong, T.T., Tomov, S., Luszczek, P., Dongarra, J.: A framework for batched and GPU-resident factorization algorithms applied to block householder transformations. In: Kunkel, J.M., Ludwig, T. (eds.) ISC High Performance 2015. LNCS, vol. 9137, pp. 31\u201347. Springer, Cham (2015). doi:10.1007\/978-3-319-20119-1_3"},{"key":"37_CR7","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1016\/j.jpdc.2014.09.003","volume":"75","author":"C Jhurani","year":"2015","unstructured":"Jhurani, C., Mullowney, P.: A gemm interface and implementation on NVIDIA GPUs for multiple small matrices. J. Parallel Distrib. Comput. 75, 133\u2013140 (2015)","journal-title":"J. Parallel Distrib. Comput."},{"issue":"3","key":"37_CR8","doi-asserted-by":"publisher","first-page":"268","DOI":"10.1145\/292395.292412","volume":"24","author":"B K\u00e5gstr\u00f6m","year":"1998","unstructured":"K\u00e5gstr\u00f6m, B., Ling, P., van Loan, C.: GEMM-based level 3 BLAS: high-performance model implementations and performance evaluation benchmark. ACM Trans. Math. Softw. 24(3), 268\u2013302 (1998)","journal-title":"ACM Trans. Math. Softw."},{"key":"37_CR9","doi-asserted-by":"publisher","first-page":"45","DOI":"10.1007\/978-3-319-06548-9_3","volume-title":"Numerical Computations with GPUs","author":"MG Lopez","year":"2014","unstructured":"Lopez, M.G., Horton, M.D.: Batch matrix exponentiation. In: Kindratenko, V. (ed.) Numerical Computations with GPUs, pp. 45\u201367. Springer, Cham (2014). doi:10.1007\/978-3-319-06548-9_3"},{"key":"37_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1007\/978-3-319-43659-3_48","volume-title":"Euro-Par 2016: Parallel Processing","author":"I Masliah","year":"2016","unstructured":"Masliah, I., Abdelfattah, A., Haidar, A., Tomov, S., Baboulin, M., Falcou, J., Dongarra, J.: High-performance matrix-matrix multiplications of very small matrices. In: Dutot, P.-F., Trystram, D. (eds.) Euro-Par 2016. LNCS, vol. 9833, pp. 659\u2013671. Springer, Cham (2016). doi:10.1007\/978-3-319-43659-3_48"},{"key":"37_CR11","unstructured":"Relton, S.D., Valero-Lara, P., Zounon, M.: A comparison of potential interfaces for batched BLAS computations. MIMS EPrint 2016.42, Manchester Institute for Mathematical Sciences, The University of Manchester, UK (2016)"},{"key":"37_CR12","doi-asserted-by":"crossref","unstructured":"Shi, Y., Niranjan, U.N., Anandkumar, A., Cecka, C.: Tensor contractions with extended BLAS kernels on CPU and GPU. arXiv preprint arXiv:1606.05696 (2016)","DOI":"10.1109\/HiPC.2016.031"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2017: Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-64203-1_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,1]],"date-time":"2021-08-01T00:07:41Z","timestamp":1627776461000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-64203-1_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319642024","9783319642031"],"references-count":12,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-64203-1_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017]]},"assertion":[{"value":"1 August 2017","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"Euro-Par","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Santiago de Compostela","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2017","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2017","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 September 2017","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"europar2017","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/europar2017.usc.es","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}