{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T04:23:18Z","timestamp":1750306998895,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2012,6,25]],"date-time":"2012-06-25T00:00:00Z","timestamp":1340582400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2012,6,25]]},"DOI":"10.1145\/2304576.2304583","type":"proceedings-article","created":{"date-parts":[[2012,6,27]],"date-time":"2012-06-27T13:31:21Z","timestamp":1340803881000},"page":"25-36","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["One stone two birds"],"prefix":"10.1145","author":[{"given":"Ziyu","family":"Guo","sequence":"first","affiliation":[{"name":"Qualcomm CDMA Technologies, San Diego, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo","family":"Wu","sequence":"additional","affiliation":[{"name":"College of William and Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xipeng","family":"Shen","sequence":"additional","affiliation":[{"name":"College of William and Mary, Williamsburg, VA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2012,6,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA CUDA. http:\/\/www.nvidia.com\/cuda.  NVIDIA CUDA. http:\/\/www.nvidia.com\/cuda."},{"key":"e_1_3_2_1_2_1","unstructured":"OpenCL. http:\/\/www.khronos.org\/opencl\/.  OpenCL. http:\/\/www.khronos.org\/opencl\/."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-02303-3_13"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/1375527.1375562"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/1531743.1531766"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_7_1","volume-title":"Morgan Kaufmann","author":"Cooper K.","year":"2003","unstructured":"K. Cooper and L. Torczon . Engineering a Compiler . Morgan Kaufmann , 2003 . K. Cooper and L. Torczon. Engineering a Compiler. Morgan Kaufmann, 2003."},{"key":"e_1_3_2_1_8_1","volume-title":"Translating gpu binaries to tiered simd architectures with ocelot","author":"Diamos G.","year":"2009","unstructured":"G. Diamos , A. Kerr , and M. Kesavan . Translating gpu binaries to tiered simd architectures with ocelot . 2009 . G. Diamos, A. Kerr, and M. Kesavan. Translating gpu binaries to tiered simd architectures with ocelot. 2009."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.12"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the International Workshop on Languages and Compilers for Parallel Computing","author":"Guo Z.","year":"2011","unstructured":"Z. Guo and X. Shen . Fine-grained treatment to synchronizations in gpu-to-cpu translation . In Proceedings of the International Workshop on Languages and Compilers for Parallel Computing , 2011 . Z. Guo and X. Shen. Fine-grained treatment to synchronizations in gpu-to-cpu translation. In Proceedings of the International Workshop on Languages and Compilers for Parallel Computing, 2011."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.62"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950409"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1504176.1504194"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5160988"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669121"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815992"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810106"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1345206.1345220"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of International Parallel and Distribute Processing Symposium (IPDPS)","author":"Steuwer M.","year":"2011","unstructured":"M. Steuwer , P. Kegel , and S. Gorlatch . Skelcl -- a library for portable high-level programming on multi-gpu systems . In Proceedings of International Parallel and Distribute Processing Symposium (IPDPS) , 2011 . M. Steuwer, P. Kegel, and S. Gorlatch. Skelcl -- a library for portable high-level programming on multi-gpu systems. In Proceedings of International Parallel and Distribute Processing Symposium (IPDPS), 2011."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1772954.1772971"},{"key":"e_1_3_2_1_21_1","volume-title":"Mcuda: An efficient implementation of cuda kernels for multi-core cpus","author":"Stratton J.","year":"2008","unstructured":"J. Stratton , S. Stone , and W. Hwu . Mcuda: An efficient implementation of cuda kernels for multi-core cpus . 2008 . J. Stratton, S. Stone, and W. Hwu. Mcuda: An efficient implementation of cuda kernels for multi-core cpus. 2008."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654082"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.5555\/1541939"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2011.56"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/1806596.1806606"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950408"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1810085.1810104"}],"event":{"name":"ICS'12: International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"San Servolo Island, Venice Italy","acronym":"ICS'12"},"container-title":["Proceedings of the 26th ACM international conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2304576.2304583","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2304576.2304583","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T08:48:47Z","timestamp":1750236527000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2304576.2304583"}},"subtitle":["synchronization relaxation and redundancy removal in GPU-CPU translation"],"short-title":[],"issued":{"date-parts":[[2012,6,25]]},"references-count":27,"alternative-id":["10.1145\/2304576.2304583","10.1145\/2304576"],"URL":"https:\/\/doi.org\/10.1145\/2304576.2304583","relation":{},"subject":[],"published":{"date-parts":[[2012,6,25]]},"assertion":[{"value":"2012-06-25","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}