{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T02:24:28Z","timestamp":1743042268346,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319527086"},{"type":"electronic","value":"9783319527093"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-52709-3_5","type":"book-chapter","created":{"date-parts":[[2017,1,23]],"date-time":"2017-01-23T07:13:25Z","timestamp":1485155605000},"page":"55-70","source":"Crossref","is-referenced-by-count":0,"title":["Energy Avoiding Matrix Multiply"],"prefix":"10.1007","author":[{"given":"Kelly","family":"Livingston","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aaron","family":"Landwehr","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jos\u00e9","family":"Monsalve","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"St\u00e9phane","family":"Zuckerman","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Beno\u00eet","family":"Meister","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guang R.","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,1,24]]},"reference":[{"issue":"1","key":"5_CR1","first-page":"012037","volume":"180","author":"E Agullo","year":"2009","unstructured":"Agullo, E., Demmel, J., Dongarra, J., Hadri, B., Kurzak, J., Langou, J., Ltaief, H., Luszczek, P., Tomov, S.: Numerical linear algebra on emerging architectures: the plasma and magma projects. J. Phys.: Conf. Ser. 180(1), 012037 (2009)","journal-title":"J. Phys.: Conf. Ser."},{"key":"5_CR2","doi-asserted-by":"crossref","first-page":"17","DOI":"10.1016\/j.procs.2012.04.003","volume":"9","author":"M Baboulin","year":"2012","unstructured":"Baboulin, M., Donfack, S., Dongarra, J., Grigori, L., R\u00e9my, A., Tomov, S.: A class of communication-avoiding algorithms for solving general dense linear systems on CPU\/GPU parallel machines. Procedia Comput. Sci. 9, 17\u201326 (2012). Proceedings of the International Conference on Computational Science, ICCS 2012","journal-title":"Procedia Comput. Sci."},{"issue":"23","key":"5_CR3","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1016\/j.laa.2006.03.018","volume":"417","author":"M Bader","year":"2006","unstructured":"Bader, M., Zenger, C.: Cache oblivious matrix multiplication using an element ordering based on a Peano curve. Linear Algebra Appl. 417(23), 301\u2013313 (2006). Special Issue in Honor of Friedrich Ludwig Bauer","journal-title":"Linear Algebra Appl."},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Ballard, G., Demmel, J., Lipshitz, B., Schwartz, O., Toledo, S.: Communication efficient Gaussian elimination with partial pivoting using a shape morphing data layout. In: SPAA 2013, Montr\u00e9al, Qu\u00e9bec, Canada. ACM (2013)","DOI":"10.1145\/2486159.2486198"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Borkar, S.: Role of interconnects in the future of computing. J. Lightwave Technol. 31(24) (2013). ISSN: 0733-8724","DOI":"10.1109\/JLT.2013.2283277"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Carter, N.P., Agrawal, A., Borkar, S., Cledat, R., David, H., Dunning, D., Fryman, J.B., Ganev, I., Golliver, R.A., Knauerhase, R.C., et al.: Runnemede: an architecture for ubiquitous high-performance computing. In: HPCA (2013)","DOI":"10.1109\/HPCA.2013.6522319"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"Chatterjee, S., Lebeck, A.R., Patnala, P.K., Thottethodi, M.: Recursive array layouts and fast parallel matrix multiplication. In: SPAA, Saint Malo, France. ACM (1999)","DOI":"10.1145\/305619.305645"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Chen, G., Anders, M., Kaul, H., Satpathy, S., Mathew, S., Hsu, S., Agarwal, A., Krishnamurthy, R., Borkar, S., De, V.: 16.1 a 340mv-to-0.9v 20.2tb\/s source-synchronous hybrid packet\/circuit-switched $$16\\times 16$$ network-on-chip in 22nm tri-gate CMOS. In: 2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC) (2014)","DOI":"10.1109\/ISSCC.2014.6757432"},{"issue":"10","key":"5_CR9","doi-asserted-by":"crossref","first-page":"2130","DOI":"10.1016\/j.ins.2006.12.003","volume":"177","author":"K-L Chung","year":"2007","unstructured":"Chung, K.-L., Huang, Y.-L., Liu, Y.-W.: Efficient algorithms for coding Hilbert curve of arbitrary-sized image and application to window query. Inf. Sci. 177(10), 2130\u20132151 (2007). Including Special Issue on Hybrid Intelligent Systems","journal-title":"Inf. Sci."},{"key":"5_CR10","doi-asserted-by":"crossref","unstructured":"D\u2019alberto, P., Bodrato, M., Nicolau, A.: Exploiting parallelism in matrix-computation kernels for symmetric multiprocessor systems: matrix-multiplication and matrix-addition algorithm optimizations by software pipelining and threads allocation. ACM Trans. Math. Softw. 38(1) (2011)","DOI":"10.1145\/2049662.2049664"},{"key":"5_CR11","doi-asserted-by":"crossref","unstructured":"Demmel, J.: Communication-avoiding algorithms for linear algebra and beyond. In: IPDPS 2013 (2013)","DOI":"10.1109\/IPDPS.2013.123"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Demmel, J., Eliahu, D., Fox, A., Kamil, S., Lipshitz, B., Schwartz, O., Spillinger, O.: Communication-optimal parallel recursive rectangular matrix multiplication. In: IPDPS (2013)","DOI":"10.1109\/IPDPS.2013.80"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Frigo, M., Leiserson, C.E., Prokop, H., Ramachandran, S.: Cache-oblivious algorithms. In: Proceedings of the 40th Annual Symposium on Foundations of Computer Science, FOCS 1999, Washington, DC, USA. IEEE Computer Society (1999)","DOI":"10.1109\/SFFCS.1999.814600"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Khan, R., Venetis, I., Livingston, K., G. Gao.: A dynamic schema to increase performance in many-core architectures through Percolation operations. In: HiPC 2013, Bangalore, India. IEEE Computer Society (2013)","DOI":"10.1109\/HiPC.2013.6799134"},{"key":"5_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1007\/3-540-47789-6_4","volume-title":"Computational Science \u2014 ICCS 2002","author":"J Hungersh\u00f6fer","year":"2002","unstructured":"Hungersh\u00f6fer, J., Wierum, J.-M.: On the quality of partitions based on space-filling curves. In: Sloot, P.M.A., Hoekstra, A.G., Tan, C.J.K., Dongarra, J.J. (eds.) ICCS 2002. LNCS, vol. 2331, pp. 36\u201345. Springer, Heidelberg (2002). doi: 10.1007\/3-540-47789-6_4"},{"key":"5_CR16","unstructured":"Intel: Strawman system architecture and evaluation (2004). http:\/\/tinyurl.com\/j6xxg22 . Accessed 10 July 2016"},{"issue":"9","key":"5_CR17","doi-asserted-by":"crossref","first-page":"1017","DOI":"10.1016\/j.jpdc.2004.03.021","volume":"64","author":"D Irony","year":"2004","unstructured":"Irony, D., Toledo, S., Tiskin, A.: Communication lower bounds for distributed-memory matrix multiplication. J. Parallel Distrib. Comput. 64(9), 1017\u20131026 (2004)","journal-title":"J. Parallel Distrib. Comput."},{"key":"5_CR18","doi-asserted-by":"crossref","unstructured":"Jaleel, A., Borch, E., Bhandaru, M., Steely Jr., S.C., Emer, J.: Achieving non-inclusive cache performance with inclusive caches: temporal locality aware (TLA) cache management policies. In: MICRO 2010, MICRO \u201943, Washington, DC, USA. IEEE Computer Society (2010)","DOI":"10.1109\/MICRO.2010.52"},{"key":"5_CR19","unstructured":"Juega, J., G\u2019omez, J., Tenllado, C., Verdoolaege, S., Cohen, A., Catthoor, F.: Evaluation of state-of-the-art polyhedral tools for automatic code generation on GPUs (2012)"},{"key":"5_CR20","doi-asserted-by":"crossref","unstructured":"Leung, A., Vasilache, N., Meister, B., Baskaran, M., Wohlford, D., Bastoul, C., Lethin, R.: A mapping path for multi-GPGPU accelerated computers from a portable high level programming abstraction. In: GPGPU-3, March 2010","DOI":"10.1145\/1735688.1735698"},{"issue":"4","key":"5_CR21","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1007\/BF02165411","volume":"13","author":"V Strassen","year":"1969","unstructured":"Strassen, V.: Gaussian elimination is not optimal. Numer. Math. 13(4), 354\u2013356 (1969)","journal-title":"Numer. Math."},{"key":"5_CR22","doi-asserted-by":"crossref","unstructured":"Verdoolaege, S., Carlos Juega, J., Cohen, A., Ignacio G\u00f3mez, J., Tenllado, C., Catthoor, F.: Polyhedral parallel code generation for CUDA. ACM Trans. Archit. Code Optim. 9(4) (2013)","DOI":"10.1145\/2400682.2400713"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Whaley, R.C., Dongarra, J.J.: Automatically tuned linear algebra software. In: SuperComputing 1998, San Jose, CA. IEEE Computer Society (1998)","DOI":"10.1109\/SC.1998.10004"},{"key":"5_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/11821045_31","volume-title":"Advances in Machine Vision, Image Processing, and Pattern Analysis","author":"J Zhang","year":"2006","unstructured":"Zhang, J., Kamata, S., Ueshige, Y.: A pseudo-Hilbert scan algorithm for arbitrarily-sized rectangle region. In: Zheng, N., Jiang, X., Lan, X. (eds.) IWICPAS 2006. LNCS, vol. 4153, pp. 290\u2013299. Springer, Heidelberg (2006). doi: 10.1007\/11821045_31"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-52709-3_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,17]],"date-time":"2019-09-17T17:58:56Z","timestamp":1568743136000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-52709-3_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319527086","9783319527093"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-52709-3_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}