{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:16:53Z","timestamp":1763468213963,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319099668"},{"type":"electronic","value":"9783319099675"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-09967-5_14","type":"book-chapter","created":{"date-parts":[[2014,9,30]],"date-time":"2014-09-30T15:10:04Z","timestamp":1412089804000},"page":"237-251","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Optimizing the LU Factorization for Energy Efficiency on a Many-Core Architecture"],"prefix":"10.1007","author":[{"given":"Elkin","family":"Garcia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jaime","family":"Arteaga","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Robert","family":"Pavel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guang R.","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2014,10,1]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Khan, R., Venetis, I., Livingston, K., Gao, G.R.: Dynamic percolation: a case of study on the shortcomings of traditional optimization in many-core architectures. In: Proceedings of 2012 ACM International Conference on Computer Frontiers (CF 2012), Cagliari, Italy, May 2012. ACM (2012)","DOI":"10.1145\/2212908.2212944"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Khan, R., Venetis, I., Livingston, K., Gao, G.: A dynamic schema to increase performance in many-core architectures through percolation operations. In: Proceedings of the 2013 IEEE International Conference on High Performance Computing (HiPC 2013), Bangalore, India, December 2013. IEEE Computer Society (2013)","DOI":"10.1109\/HiPC.2013.6799134"},{"key":"14_CR3","unstructured":"Bergman, K., Borkar, S., Campbell, D., Carlson, W., Dally, W., Denneau, M., Franzon, P., Harrod, W., Hiller, J., Karp, S., Keckler, S., Klein, D., Lucas, R., Richards, M.,\u00a0Scarpelli, A., Scott, S., Snavely, A., Sterling, T., Williams, R.S., Yelick, K.:\u00a0Exascale computing study: technology challenges in achieving exascale systems. DARPA Information Processing Techniques Office (IPTO) sponsored study (2008)"},{"key":"14_CR4","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1109\/MC.2009.341","volume":"42","author":"J Torrellas","year":"2009","unstructured":"Torrellas, J.: Architectures for extreme-scale computing. Computer 42, 28\u201335 (2009)","journal-title":"Computer"},{"key":"14_CR5","first-page":"145","volume-title":"Encyclopedia of Parallel Computing: SpringerReference","author":"M Denneau","year":"2011","unstructured":"Denneau, M.: Cyclops. In: Padua, D. (ed.) Encyclopedia of Parallel Computing: SpringerReference, p. 145. Springer, Heidelberg (2011). www.springerreference.com"},{"key":"14_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"316","DOI":"10.1007\/978-3-642-15291-7_29","volume-title":"Euro-Par 2010 - Parallel Processing","author":"E Garcia","year":"2010","unstructured":"Garcia, E., Venetis, I.E., Khan, R., Gao, G.R.: Optimized dense matrix multiplication on a many-core architecture. In: D\u2019Ambra, P., Guarracino, M., Talia, D. (eds.) Euro-Par 2010, Part II. LNCS, vol. 6272, pp. 316\u2013327. Springer, Heidelberg (2010)"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L., Gao, G.R.: Performance analysis of cooley-tukey fft algorithms for a many-core architecture, in Proceedings of the 2010 Spring Simulation Multiconference, SpringSim \u201910, (San Diego, CA, USA), pp. 81:1\u201381:8, Society for Computer Simulation International, 2010","DOI":"10.1145\/1878537.1878622"},{"key":"14_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1007\/978-3-642-19595-2_6","volume-title":"Languages and Compilers for Parallel Computing","author":"D Orozco","year":"2011","unstructured":"Orozco, D., Garcia, E., Gao, G.: Locality optimization of stencil applications using data dependency graphs. In: Cooper, K., Mellor-Crummey, J., Sarkar, V. (eds.) LCPC 2010. LNCS, vol. 6548, pp. 77\u201391. Springer, Heidelberg (2011)"},{"key":"14_CR9","unstructured":"Garcia, E., Orozco, D., Gao, G.: Energy efficient tiling on a many-core architecture. In: Proceedings of 4th Workshop on Programmability Issues for Heterogeneous Multicores (MULTIPROG-2011); 6th International Conference on High-Performance and Embedded Architectures and Compilers (HiPEAC), Heraklion, Greece, January 2011, pp. 53\u201366 (2011)"},{"key":"14_CR10","unstructured":"Chen, O.Y.: A comparison of pivoting strategies for the direct lu factorization. In: Electronic Proceedings of the Eighth Annual International Conference on Technology in Collegiate Mathematics Houston, Texas, 16\u201319 November 1995"},{"key":"14_CR11","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1137\/1037042","volume":"37","author":"JJ Dongarra","year":"1995","unstructured":"Dongarra, J.J., Walker, D.W.: Software libraries for linear algebra computations on high performance computers. SIAM Rev. 37, 151\u2013180 (1995)","journal-title":"SIAM Rev."},{"issue":"9","key":"14_CR12","doi-asserted-by":"publisher","first-page":"803","DOI":"10.1002\/cpe.728","volume":"15","author":"J Dongarra","year":"2003","unstructured":"Dongarra, J., Luszczek, P., Petitet, A.: The linpack benchmark: past, present and future. Concurrency Comput.: Pract. Exper. 15(9), 803\u2013820 (2003)","journal-title":"Concurrency Comput.: Pract. Exper."},{"key":"14_CR13","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1145\/225830.223990","volume":"23","author":"SC Woo","year":"1995","unstructured":"Woo, S.C., Ohara, M., Torrie, E., Singh, J.P., Gupta, A.: The splash-2 programs: characterization and methodological considerations. SIGARCH Comput. Archit. News 23, 24\u201336 (1995)","journal-title":"SIGARCH Comput. Archit. News"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Venetis, I.E., Gao, G.R.: Mapping the LU decomposition on a many-core architecture: challenges and solutions. In: Proceedings of the 6th ACM Conference on Computing Frontiers (CF \u201909), Ischia, Italy, May 2009, pp. 71\u201380 (2009)","DOI":"10.1145\/1531743.1531756"},{"key":"#cr-split#-14_CR15.1","doi-asserted-by":"crossref","unstructured":"Garcia, E., Orozco, D., Pavel, R., Gao, G.R.: A discussion in favor of dynamic scheduling for regular applications in many-core architectures. In: Proceedings of 2012 Workshop on Multithreaded Architectures and Applications (MTAAP 2012)","DOI":"10.1109\/IPDPSW.2012.200"},{"key":"#cr-split#-14_CR15.2","unstructured":"26th IEEE International Parallel and Distributed Processing Symposium (IPDPS 2012), Shanghai, China, May 2012. IEEE (2012)"},{"key":"14_CR16","unstructured":"del Cuvillo, J., Zhu, W., Hu, Z., Gao, G.R.: FAST: a functionally accurate simulation toolset for the cyclops-64 cellular architecture. In: Workshop on Modeling, Benchmarking, and Simulation (MoBS \u201905), in Conjunction with the 32nd Annual International Symposium on Computer Architecture (ISCA 05), pp. 11\u201320 (2005)"},{"key":"14_CR17","unstructured":"Yao, F., Demers, A., Shenker, S.: A scheduling model for reduced CPU energy. In: Proceedings of the 36th Annual Symposium on Foundations of Computer Science, October 1995, pp. 374\u2013382 (1995)"},{"key":"14_CR18","series-title":"The Kluwer International Series in Engineering and Computer Science","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/978-0-585-29603-6_17","volume-title":"Mobile Computing","author":"M Weiser","year":"1996","unstructured":"Weiser, M., Welch, B., Demers, A., Shenker, S.: Scheduling for reduced cpu energy. In: Imielinski, T., Korth, H.F. (eds.) Mobile Computing. The Kluwer International Series in Engineering and Computer Science, vol. 353, pp. 449\u2013471. Springer, Boston (1996)"},{"key":"14_CR19","unstructured":"Steinke, S., Knauer, M., Wehmeyer, L., Marwedel, P.: An accurate and fine grain instruction-level energy model supporting software optimizations. In: Proceedings of PATMOS, Citeseer (2001)"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Lee, S., Ermedahl, A., Min, S.L.: An accurate instruction-level energy consumption model for embedded risc processors. In: LCTES \u201901: Proceedings of the ACM SIGPLAN Workshop on Languages, Compilers and Tools for Embedded Systems, New York, NY, USA, pp. 1\u201310. ACM (2001)","DOI":"10.1145\/384196.384201"},{"key":"14_CR21","doi-asserted-by":"publisher","first-page":"262","DOI":"10.1109\/TVLSI.2007.891101","volume":"15","author":"A Andrei","year":"2007","unstructured":"Andrei, A., Eles, P., Peng, Z., Schmitz, M., Hashimi, B.: Energy optimization of multiprocessor systems on chip by voltage selection. IEEE Trans. Very Large Scale Integr. (VLSI) Syst. 15, 262\u2013275 (2007)","journal-title":"IEEE Trans. Very Large Scale Integr. (VLSI) Syst."},{"key":"14_CR22","doi-asserted-by":"crossref","unstructured":"Donfack, S., Grigori, L., Gropp, W., Kale, V.: Hybrid static\/dynamic scheduling for already optimized dense matrix factorization. In: 2012 IEEE 26th International Parallel Distributed Processing Symposium (IPDPS), pp. 496\u2013507 (2012)","DOI":"10.1109\/IPDPS.2012.53"}],"container-title":["Lecture Notes in Computer Science","Languages and Compilers for Parallel Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-09967-5_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T11:01:19Z","timestamp":1676890879000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-09967-5_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319099668","9783319099675"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-09967-5_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"1 October 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}