{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T16:15:40Z","timestamp":1743005740705,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":11,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540854500"},{"type":"electronic","value":"9783540854517"}],"license":[{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2008,1,1]],"date-time":"2008-01-01T00:00:00Z","timestamp":1199145600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2008]]},"DOI":"10.1007\/978-3-540-85451-7_14","type":"book-chapter","created":{"date-parts":[[2008,8,19]],"date-time":"2008-08-19T13:15:29Z","timestamp":1219151729000},"page":"120-129","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A Performance Model of Dense Matrix Operations on Many-Core Architectures"],"prefix":"10.1007","author":[{"given":"Guoping","family":"Long","sequence":"first","affiliation":[]},{"given":"Dongrui","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Junchao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Fenglong","family":"Song","sequence":"additional","affiliation":[]},{"given":"Nan","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"14_CR1","unstructured":"Asanovic, K., Bodik, R., Catanzaro, B.C., Gebis, J.J., Husbands, P., Keutzer, K., Patterson, D.A., Plishker, W.L., Shalf, J., Williams, S.W., Yelick, K.A.: The Landscape of Parallel Computing Research: A View from Berkeley"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Zhu, W.R., Sreedhar, V.C., Aang Hu, Z., Gao, G.R.: Synchronization State Buffer: Supporting Efficient Fine-Grain Synchronization for Many-Core Architectures. In: Proceedings of the 34th International Symposium on Computer Architecture (ISCA 2007), San Diego, CA, USA, June 9-13 (2007)","DOI":"10.1145\/1250662.1250668"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Vangal, S., Howard, J., Ruhl, G., Dighe, S., Wilson, H., Tschanz, J., Finan, D., Iyer, P., Singh, A., Jacob, T., Jain, S., Venkataraman, S., Hoskote, Y., Borkar, N.: An 80-Tile 1.28TFLOPS Network-on-Chip in 65nm CMOS. In: Proceedings of IEEE International Solid-State Circuits Conference, February 11-15 (2007)","DOI":"10.1109\/ISSCC.2007.373606"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Dally, W.J., Labonte, F., Das, A., Hanrahan, P., Ahn, J.H., Gummaraju, J., Erez, M., Jayasena, N., Buck, I., Knight, T.J., Kapasi, U.J.: Merrimac: Supercomputing with Streams. In: Proceedings of the Supercomputer Conference, November 15-21 (2003)","DOI":"10.1145\/1048935.1050187"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Tan, G., Fan, D., Zhang, J., Russo, A., Gao, G.R.: Experience on Optimizing Irregular Computation for Memory Hierarchy in Manycore Architecture. In: The 13th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, February 20-23 (2008)","DOI":"10.1145\/1345206.1345255"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Ang Hu, Z., del Cuvillo, J., Zhu, W., Gao, G.R.: Optimization of Dense Matrix Multiplication on IBM Cyclops-64: Challenges and Experiences. In: The 12th International European Conference on Parallel Processing, 29 August - 1 September (2006)","DOI":"10.1007\/11823285_14"},{"key":"14_CR7","unstructured":"Venetis, I.E., Gao, G.R.: Optimizing the LU Benchmark for the Cyclops-64 Architecture. CAPSL Technical Memo 75 (February 2007)"},{"key":"14_CR8","unstructured":"Tan, G.: Locality and Parallelism of Algorithm in Irregular Computation. PH.D. dissertation. Institute of Computing Technology, Chinese Academy of Sciences (6) (2007)"},{"key":"14_CR9","unstructured":"Automatically Tuned Linear Algebra Software (ATLAS), \n                    \n                      http:\/\/math-atlas.sourceforge.net\/"},{"key":"14_CR10","doi-asserted-by":"crossref","unstructured":"Yotov, K., Roeder, T., Pingali, K., Gunnels, J., Gustavson, F.: An Experimental Comparison of Cache-oblivious and Cache-aware Programs. In: Proceedings of the 19th Annual ACM Symposium on Parallelism in Algorithms and Architectures, June 9-11 (2007)","DOI":"10.1145\/1248377.1248394"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Bilardi, G., Pietracaprina, A., Pucci, G., Schifano, S.F., Tripiccione, R.: The Potential of On-Chip Multiprocessing for QCD Machines. In: Proceedings of the International Conference on High Performance Computing, pp. 386\u2013397 (December 2005)","DOI":"10.1007\/11602569_41"}],"container-title":["Lecture Notes in Computer Science","Euro-Par 2008 \u2013 Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-85451-7_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,5,19]],"date-time":"2020-05-19T13:16:27Z","timestamp":1589894187000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-85451-7_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008]]},"ISBN":["9783540854500","9783540854517"],"references-count":11,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-85451-7_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2008]]},"assertion":[{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}