{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T16:01:29Z","timestamp":1774454489859,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2008,2,20]],"date-time":"2008-02-20T00:00:00Z","timestamp":1203465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2008,2,20]]},"DOI":"10.1145\/1345206.1345220","type":"proceedings-article","created":{"date-parts":[[2008,2,28]],"date-time":"2008-02-28T14:02:49Z","timestamp":1204207369000},"page":"73-82","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":510,"title":["Optimization principles and application performance evaluation of a multithreaded GPU using CUDA"],"prefix":"10.1145","author":[{"given":"Shane","family":"Ryoo","sequence":"first","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]},{"given":"Christopher I.","family":"Rodrigues","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]},{"given":"Sara S.","family":"Baghsorkhi","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]},{"given":"Sam S.","family":"Stone","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]},{"given":"David B.","family":"Kirk","sequence":"additional","affiliation":[{"name":"NVIDIA Corporation, Santa Clara, CA, USA"}]},{"given":"Wen-mei W.","family":"Hwu","sequence":"additional","affiliation":[{"name":"University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]}],"member":"320","published-online":{"date-parts":[[2008,2,20]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AMD Stream Processor. http:\/\/ati.amd.com\/products\/ streamprocessor\/index.html.  AMD Stream Processor. http:\/\/ati.amd.com\/products\/ streamprocessor\/index.html."},{"key":"e_1_3_2_1_2_1","unstructured":"CUDA benchmark suite. http:\/\/www.crhc.uiuc.edu\/impact\/cudabench.html.  CUDA benchmark suite. http:\/\/www.crhc.uiuc.edu\/impact\/cudabench.html."},{"key":"e_1_3_2_1_3_1","unstructured":"NVIDIA CUDA. http:\/\/developer.nvidia.com\/object\/cuda.html.  NVIDIA CUDA. http:\/\/developer.nvidia.com\/object\/cuda.html."},{"key":"e_1_3_2_1_4_1","volume-title":"High productivity software development for multi-core processors. Technical report","author":"The","year":"2006","unstructured":"The PeakStream platform : High productivity software development for multi-core processors. Technical report , 2006 . The PeakStream platform: High productivity software development for multi-core processors. Technical report, 2006."},{"key":"e_1_3_2_1_5_1","volume-title":"Fall","author":"ECE","year":"2007","unstructured":"ECE 498AL1 : Programming massively parallel processors , Fall 2007 . http:\/\/courses.ece.uiuc.edu\/ece498\/al1\/. ECE 498AL1: Programming massively parallel processors, Fall 2007. http:\/\/courses.ece.uiuc.edu\/ece498\/al1\/."},{"key":"e_1_3_2_1_6_1","volume-title":"Fortran 90 handbook: complete ANSI\/ISO reference","author":"Adams J. C.","year":"1992","unstructured":"J. C. Adams , W. S. Brainerd , J. T. Martin , B. T. Smith , and J. L. Wagener . Fortran 90 handbook: complete ANSI\/ISO reference . Intertext Publications, Inc. ,\/McGraw- Hill, Inc ., 1992 . J. C. Adams, W. S. Brainerd, J. T. Martin, B. T. Smith, and J. L. Wagener. Fortran 90 handbook: complete ANSI\/ISO reference. Intertext Publications, Inc.,\/McGraw-Hill, Inc., 1992."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/29873.29875"},{"key":"e_1_3_2_1_8_1","volume-title":"Algorithms and Theory of Computation Handbook","author":"Atallah M. J.","year":"1998","unstructured":"M. J. Atallah , editor. Algorithms and Theory of Computation Handbook . CRC Press LLC , 1998 . M. J. Atallah, editor. Algorithms and Theory of Computation Handbook. CRC Press LLC, 1998."},{"key":"e_1_3_2_1_9_1","volume-title":"October","author":"Buck I.","year":"2003","unstructured":"I. Buck . Brook Specification v0.2 , October 2003 . I. Buck. Brook Specification v0.2, October 2003."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/989393.989428"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1058129.1058148"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1188455.1188549"},{"key":"e_1_3_2_1_13_1","volume-title":"Optimizing compilers for modern architectures: a dependence-based approach","author":"Kennedy K.","year":"2002","unstructured":"K. Kennedy and J. R. Allen . Optimizing compilers for modern architectures: a dependence-based approach . Morgan Kaufmann Publishers Inc ., 2002 . K. Kennedy and J. R. Allen. Optimizing compilers for modern architectures: a dependence-based approach. Morgan Kaufmann Publishers Inc., 2002."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/106972.106981"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/88.219857"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1201775.882362"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1188455.1188642"},{"key":"e_1_3_2_1_18_1","volume-title":"Microprocessor Forum","author":"Nickolls J.","year":"2007","unstructured":"J. Nickolls and I. Buck . NVIDIA CUDA software and GPU parallel computing architecture . Microprocessor Forum , May 2007 . J. Nickolls and I. Buck. NVIDIA CUDA software and GPU parallel computing architecture. Microprocessor Forum, May 2007."},{"key":"e_1_3_2_1_19_1","volume-title":"May","author":"Architecture Review Board MP","year":"2005","unstructured":"Open MP Architecture Review Board . OpenMP application program interface , May 2005 . OpenMP Architecture Review Board. OpenMP application program interface, May 2005."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"J. Owens. Streaming architectures and technology trends. GPU Gems 2 pages 457--470 March 2005.  J. Owens. Streaming architectures and technology trends. GPU Gems 2 pages 457--470 March 2005.","DOI":"10.1145\/1198555.1198766"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1111\/j.1467-8659.2007.01012.x"},{"key":"e_1_3_2_1_22_1","volume-title":"The First Workshop on General Purpose Processing on Graphics Processing Units","author":"Ryoo S.","year":"2007","unstructured":"S. Ryoo , C. I. Rodrigues , S. S. Stone , S. S. Baghsorkhi , S.-Z. Ueng , and W. W. Hwu . Program optimization study on a 128-core GPU . In The First Workshop on General Purpose Processing on Graphics Processing Units , October 2007 . S. Ryoo, C. I. Rodrigues, S. S. Stone, S. S. Baghsorkhi, S.-Z. Ueng, and W. W. Hwu. Program optimization study on a 128-core GPU. In The First Workshop on General Purpose Processing on Graphics Processing Units, October 2007."},{"key":"e_1_3_2_1_23_1","volume-title":"MPI: The Complete Reference","author":"Snir M.","year":"1995","unstructured":"M. Snir , S. W. Otto , D. W. Walker , J. Dongarra , and S. Huss-Lederman . MPI: The Complete Reference . MIT Press , 1995 . M. Snir, S. W. Otto, D. W. Walker, J. Dongarra, and S. Huss-Lederman. MPI: The Complete Reference. MIT Press, 1995."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.20829"},{"key":"e_1_3_2_1_25_1","volume-title":"The First Workshop on General Purpose Processing on Graphics Processing Units","author":"Stone S. S.","year":"2007","unstructured":"S. S. Stone , H. Yi , W. W. Hwu , J. P. Haldar , B. P. Sutton , and Z.-P. Liang . How GPUs can improve the quality of magnetic resonance imaging . In The First Workshop on General Purpose Processing on Graphics Processing Units , October 2007 . S. S. Stone, H. Yi, W. W. Hwu, J. P. Haldar, B. P. Sutton, and Z.-P. Liang. How GPUs can improve the quality of magnetic resonance imaging. In The First Workshop on General Purpose Processing on Graphics Processing Units, October 2007."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1168857.1168898"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1250734.1250753"},{"key":"e_1_3_2_1_28_1","volume-title":"Optimizing Supercompilers for Supercomputers","author":"Wolfe M. J.","year":"1990","unstructured":"M. J. Wolfe . Optimizing Supercompilers for Supercomputers . MIT Press , 1990 . M. J. Wolfe. Optimizing Supercompilers for Supercomputers. MIT Press, 1990."}],"event":{"name":"PPoPP08: ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","location":"Salt Lake City UT USA","acronym":"PPoPP08","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","ACM Association for Computing Machinery"]},"container-title":["Proceedings of the 13th ACM SIGPLAN Symposium on Principles and practice of parallel programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1345206.1345220","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/1345206.1345220","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T13:56:18Z","timestamp":1750254978000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/1345206.1345220"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2008,2,20]]},"references-count":28,"alternative-id":["10.1145\/1345206.1345220","10.1145\/1345206"],"URL":"https:\/\/doi.org\/10.1145\/1345206.1345220","relation":{},"subject":[],"published":{"date-parts":[[2008,2,20]]},"assertion":[{"value":"2008-02-20","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}