{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,29]],"date-time":"2025-09-29T07:46:49Z","timestamp":1759132009271,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2009,5,18]],"date-time":"2009-05-18T00:00:00Z","timestamp":1242604800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2009,5,18]]},"DOI":"10.1145\/1531743.1531756","type":"proceedings-article","created":{"date-parts":[[2009,5,19]],"date-time":"2009-05-19T16:47:44Z","timestamp":1242751664000},"page":"71-80","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":17,"title":["Mapping the LU decomposition on a many-core architecture"],"prefix":"10.1145","author":[{"given":"Ioannis E.","family":"Venetis","sequence":"first","affiliation":[{"name":"University of Patras, Patras, Greece"}]},{"given":"Guang R.","family":"Gao","sequence":"additional","affiliation":[{"name":"University of Delaware, Newark, DE, USA"}]}],"member":"320","published-online":{"date-parts":[[2009,5,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","DOI":"10.1137\/1.9780898719604","volume-title":"LAPACK Users' Guide","author":"Anderson E.","year":"1999","unstructured":"E. Anderson , Z. Bai , C. Bischof , S. Blackford , J. Demmel , J. Dongarra , J. D. Croz , A. Greenbaum , S. Hammarling , A. McKenney , and D. Sorensen . LAPACK Users' Guide . SIAM , 3 rd edition, 1999 . E. Anderson, Z. Bai, C. Bischof, S. Blackford, J. Demmel, J. Dongarra, J. D. Croz, A. Greenbaum, S. Hammarling, A. McKenney, and D. Sorensen. LAPACK Users' Guide. SIAM, 3rd edition, 1999.","edition":"3"},{"key":"e_1_3_2_1_3_1","volume-title":"A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures. LAPACK Working Note","author":"Buttari A.","year":"2007","unstructured":"A. Buttari , J. Langou , J. Kurzak , and J. Dongarra . A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures. LAPACK Working Note 194, Nov. 2007 . A. Buttari, J. Langou, J. Kurzak, and J. Dongarra. A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures. LAPACK Working Note 194, Nov. 2007."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/93542.93553"},{"key":"e_1_3_2_1_5_1","unstructured":"T. Chen R. Raghavan J. Dale and E. Iwata. Cell Broadband Engine Architecture and its First Implementation: A Performance View. http:\/\/www-128.ibm.com\/developerworks\/power\/library\/pa-cellperf.  T. Chen R. Raghavan J. Dale and E. Iwata. Cell Broadband Engine Architecture and its First Implementation: A Performance View. http:\/\/www-128.ibm.com\/developerworks\/power\/library\/pa-cellperf."},{"key":"e_1_3_2_1_6_1","unstructured":"Clearspeed White Paper: CSX Processor Architecture. http:\/\/www.clearspeed.com\/newsevents\/presskit.  Clearspeed White Paper: CSX Processor Architecture. http:\/\/www.clearspeed.com\/newsevents\/presskit."},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the 2005 Workshop on Modeling, Benchmarking, and Simulation (MoBS 2005","author":"del Cuvillo J.","year":"2005","unstructured":"J. del Cuvillo , W. Zhu , Z. Hu , and G. R. Gao . FAST: A Functionally Accurate Simulation Toolset for the Cyclops-64 Cellular Architecture . In Proceedings of the 2005 Workshop on Modeling, Benchmarking, and Simulation (MoBS 2005 ), Madison, Wisconsin , June 2005 . J. del Cuvillo, W. Zhu, Z. Hu, and G. R. Gao. FAST: A Functionally Accurate Simulation Toolset for the Cyclops-64 Cellular Architecture. In Proceedings of the 2005 Workshop on Modeling, Benchmarking, and Simulation (MoBS 2005), Madison, Wisconsin, June 2005."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2005.434"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/77626.79170"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1137\/1037042"},{"volume-title":"The Push of Network Processing to the Top of the Pyramid. Keynote at the Symposium on Architectures for Networking and Communication Systems","author":"Eatherton W.","key":"e_1_3_2_1_11_1","unstructured":"W. Eatherton . The Push of Network Processing to the Top of the Pyramid. Keynote at the Symposium on Architectures for Networking and Communication Systems , Princeton, NJ . W. Eatherton. The Push of Network Processing to the Top of the Pyramid. Keynote at the Symposium on Architectures for Networking and Communication Systems, Princeton, NJ."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/55364.55388"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1147\/rd.416.0737"},{"key":"e_1_3_2_1_14_1","volume-title":"http:\/\/www.netlib.org\/benchmark\/hpl","author":"Portable HPL - A","year":"2004","unstructured":"HPL - A Portable Implementation of the High-Performance Linpack Benchmark for Distributed-Memory Computers . http:\/\/www.netlib.org\/benchmark\/hpl , 2004 . HPL - A Portable Implementation of the High-Performance Linpack Benchmark for Distributed-Memory Computers. http:\/\/www.netlib.org\/benchmark\/hpl, 2004."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/11823285_14"},{"key":"e_1_3_2_1_16_1","volume-title":"The OpenMP Implementation of NAS Parallel Benchmarks and its Performance. Technical report nas-99-011","author":"Jin H.","year":"1999","unstructured":"H. Jin , M. Frumkin , and J. Yan . The OpenMP Implementation of NAS Parallel Benchmarks and its Performance. Technical report nas-99-011 , NASA Ames Research Center , 1999 . H. Jin, M. Frumkin, and J. Yan. The OpenMP Implementation of NAS Parallel Benchmarks and its Performance. Technical report nas-99-011, NASA Ames Research Center, 1999."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/233561.233564"},{"key":"e_1_3_2_1_18_1","unstructured":"Message Passing Interface Forum. MPI-2:Extensions to the Message-Passing Interface 2003.  Message Passing Interface Forum. MPI-2:Extensions to the Message-Passing Interface 2003."},{"key":"e_1_3_2_1_19_1","volume-title":"FLAME Working Note","author":"Quintana-Orti G.","year":"2007","unstructured":"G. Quintana-Orti , E. S. Quintana-Orti , E. Chan , R. A. van de Geijn, and F. G. V. Zee. Design and Scheduling of an Algorithm-by-Blocks for the LU Factorization on Multithreaded Architectures . FLAME Working Note 26, Sept. 2007 . G. Quintana-Orti, E. S. Quintana-Orti, E. Chan, R. A. van de Geijn, and F. G. V. Zee. Design and Scheduling of an Algorithm-by-Blocks for the LU Factorization on Multithreaded Architectures. FLAME Working Note 26, Sept. 2007."},{"key":"e_1_3_2_1_20_1","unstructured":"The Top500 List. http:\/\/www.top500.org.  The Top500 List. http:\/\/www.top500.org."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2007.373606"},{"key":"e_1_3_2_1_22_1","volume-title":"Optimizing the LU Benchmark for the Cyclops-64 Architecture. Technical Memo 75, Computer Architecture and Parallel Systems Laboratory","author":"Venetis I. E.","year":"2007","unstructured":"I. E. Venetis and G. R. Gao . Optimizing the LU Benchmark for the Cyclops-64 Architecture. Technical Memo 75, Computer Architecture and Parallel Systems Laboratory , University of Delaware , Feb. 2007 . http:\/\/www.capsl.udel.edu\/publications.shtml. I. E. Venetis and G. R. Gao. Optimizing the LU Benchmark for the Cyclops-64 Architecture. Technical Memo 75, Computer Architecture and Parallel Systems Laboratory, University of Delaware, Feb. 2007. http:\/\/www.capsl.udel.edu\/publications.shtml."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/223982.223990"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250662.1250668"}],"event":{"name":"CF '09: Computing Frontiers Conference","sponsor":["ACM Association for Computing Machinery","SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing"],"location":"Ischia Italy","acronym":"CF '09"},"container-title":["Proceedings of the 6th ACM conference on Computing frontiers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1531743.1531756","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1531743.1531756","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:29:27Z","timestamp":1750253367000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1531743.1531756"}},"subtitle":["challenges and solutions"],"short-title":[],"issued":{"date-parts":[[2009,5,18]]},"references-count":23,"alternative-id":["10.1145\/1531743.1531756","10.1145\/1531743"],"URL":"https:\/\/doi.org\/10.1145\/1531743.1531756","relation":{},"subject":[],"published":{"date-parts":[[2009,5,18]]},"assertion":[{"value":"2009-05-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}