{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:09:52Z","timestamp":1763467792194,"version":"3.33.0"},"publisher-location":"Berlin, Heidelberg","reference-count":19,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540713500"},{"type":"electronic","value":"9783540713517"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.1007\/978-3-540-71351-7_24","type":"book-chapter","created":{"date-parts":[[2007,5,29]],"date-time":"2007-05-29T18:48:52Z","timestamp":1180464532000},"page":"305-318","source":"Crossref","is-referenced-by-count":19,"title":["Parallel Processing of Matrix Multiplication in a CPU and GPU Heterogeneous Environment"],"prefix":"10.1007","author":[{"given":"Satoshi","family":"Ohshima","sequence":"first","affiliation":[]},{"given":"Kenji","family":"Kise","sequence":"additional","affiliation":[]},{"given":"Takahiro","family":"Katagiri","sequence":"additional","affiliation":[]},{"given":"Toshitsugu","family":"Yuba","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"24_CR1","unstructured":"gpgpu.org: General-Purpose computation on GPUs(GPGPU), http:\/\/gpgpu.org\/"},{"key":"24_CR2","first-page":"306","volume-title":"Proceedings of the 35th annual ACM\/IEEE International Symposium on Microarchitecture","author":"C.J. Thompson","year":"2002","unstructured":"Thompson, C.J., Hahn, S., Oskin, M.: Using Modern Graphics Architectures for General-Purpose Computing: A Framework and Analysis. In: Proceedings of the 35th annual ACM\/IEEE International Symposium on Microarchitecture, pp. 306\u2013317. IEEE Computer Society Press, Los Alamitos (2002)"},{"key":"24_CR3","unstructured":"Owens, J.D., et al.: A Survey of General-Purpose Computation on Graphics Hardware. In: Eurographics 2005, State of the Art Reports, Dublin, Ireland, pp. 21\u201351 (2005)"},{"key":"24_CR4","doi-asserted-by":"publisher","first-page":"352","DOI":"10.1145\/98267.98290","volume":"16","author":"N.J. Higham","year":"1990","unstructured":"Higham, N.J.: Exploiting Fast Matrix Multiplication Within the Level 3 BLAS. ACM Transactions on Mathematical Software\u00a016, 352\u2013368 (1990)","journal-title":"ACM Transactions on Mathematical Software"},{"issue":"1\u20132","key":"24_CR5","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/S0167-8191(00)00087-9","volume":"27","author":"R.C. Whaley","year":"2001","unstructured":"Whaley, R.C., Petitet, A., Dongarra, J.J.: Automated Empirical Optimization of Software and the ATLAS Project. Parallel Computing\u00a027(1\u20132), 3\u201335 (2001)","journal-title":"Parallel Computing"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"John Montrym, H.M.: THE GEFORCE 6800. IEEE MICRO 2005\u00a025(2) (2005)","DOI":"10.1109\/MM.2005.37"},{"key":"24_CR7","volume-title":"GPU Gems: Programming Techniques, Tips and Tricks for Real-Time Graphics","author":"R. Fernando","year":"2004","unstructured":"Fernando, R.: GPU Gems: Programming Techniques, Tips and Tricks for Real-Time Graphics. Addison-Wesley, Reading (2004)"},{"key":"24_CR8","unstructured":"Shinomoto, Y., et al.: Consideration for Speculative Rendering in PVR. In: IPSJ SIG Technical Reports, 2005-ARC-164, pp. 145\u2013150 (2005)"},{"key":"24_CR9","volume-title":"ACM Workshop on General-Purpose Computing on Graphics Processors","author":"T. Amada","year":"2004","unstructured":"Amada, T., et al.: Partivle-Based Fluid Simulation on GPU. In: ACM Workshop on General-Purpose Computing on Graphics Processors, ACM Press, New York (2004)"},{"key":"24_CR10","unstructured":"Morav\u00e1nszky, A.: Dense Matrix Algebra on the GPU, ShaderX2 (2003)"},{"key":"24_CR11","doi-asserted-by":"publisher","first-page":"908","DOI":"10.1145\/1201775.882363","volume-title":"Proceedings of ACM SIGGRAPH 2003","author":"J. Kr\u00fcger","year":"2003","unstructured":"Kr\u00fcger, J., Westermann, R.: Linear Algebra Operators for GPU Implementation of Numerical Algorithms. In: Proceedings of ACM SIGGRAPH 2003, pp. 908\u2013916. ACM Press, New York (2003)"},{"key":"24_CR12","unstructured":"Moreland, K., Angel, E.: The FFT on a GPU. In: Proc. SIGGRAPH \/ EUROGRAPHICS Workshop Graphics Hardware, pp. 112\u2013119 (2003)"},{"key":"24_CR13","unstructured":"Hillesland, K., Lastra, A.: GPU floating-point paranoia. In: Proceedings of GP2 (2004)"},{"key":"24_CR14","volume-title":"Proceedings of the 2001 ACM\/IEEE conference on Supercomputing","author":"E.S. Larsen","year":"2001","unstructured":"Larsen, E.S., McAllister, D.: Fast matrix multiplies using graphics hardware. In: Proceedings of the 2001 ACM\/IEEE conference on Supercomputing, IEEE Computer Society Press, Los Alamitos (2001)"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Fatahalian, K., Sugerman, J., Hanrahan, P.: Understanding the Efficiency of GPU Algorithms for Matrix-Matrix Multiplication. In: Graphics Hardware 2004 (2004)","DOI":"10.1145\/1058129.1058148"},{"key":"24_CR16","unstructured":"Hall, J.D., Carr, N.A., Hart, J.C.: Cache and Bandwidth Aware Matrix Multiplication on the GPU. Technical report, University of Illinois Dept. of Computer Science (2003)"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Jiang, C., Snir, M.: Automatic Tuning Matrix Multiplication Performance on Graphics Hardware. In: Proceedings of the 14th International Conference on Parallel Architectures and Compilation Techniques (PACT\u201905), pp. 185\u2013196 (2005)","DOI":"10.1109\/PACT.2005.10"},{"key":"24_CR18","doi-asserted-by":"publisher","first-page":"133","DOI":"10.1145\/264029.264030","volume":"23","author":"L.S. Blackford","year":"1997","unstructured":"Blackford, L.S., et al.: Practical experience in the numerical dangers of heterogeneous computing. ACM Transactions on Mathematical Software (TOMS)\u00a023, 133\u2013147 (1997)","journal-title":"ACM Transactions on Mathematical Software (TOMS)"},{"key":"24_CR19","unstructured":"Microsoft: DirectX Developer Center, http:\/\/msdn.microsoft.com\/directx\/"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing for Computational Science - VECPAR 2006"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-540-71351-7_24.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:59:43Z","timestamp":1737053983000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-540-71351-7_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[null]]},"ISBN":["9783540713500","9783540713517"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-540-71351-7_24","relation":{},"subject":[]}}