{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,6]],"date-time":"2026-01-06T13:52:03Z","timestamp":1767707523824,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,9,11]],"date-time":"2017-09-11T00:00:00Z","timestamp":1505088000000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS 1117280"],"award-info":[{"award-number":["CNS 1117280"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,9,11]]},"DOI":"10.1145\/2967938.2967964","type":"proceedings-article","created":{"date-parts":[[2016,8,31]],"date-time":"2016-08-31T08:32:08Z","timestamp":1472632328000},"page":"373-386","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["Rinnegan"],"prefix":"10.1145","author":[{"given":"Sankaralingam","family":"Panneerselvam","sequence":"first","affiliation":[{"name":"University of Wisconsin, Madison, Madison, WI, USA"}]},{"given":"Michael","family":"Swift","sequence":"additional","affiliation":[{"name":"University of Wisconsin, Madison, Madison, WI, USA"}]}],"member":"320","published-online":{"date-parts":[[2016,9,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"\"Advanced Encryption Standard (AES) \" http:\/\/www.csrc.nist.gov\/publications\/fips\/fips197\/fips-197.pdf 2001."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/121132.121151"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2005.36"},{"key":"e_1_3_2_1_4_1","unstructured":"\"AMD A-Series Desktop APUs \" http:\/\/www.amd.com\/us\/products\/desktop\/processors\/a-series\/Pages\/nextgenapu.aspx."},{"key":"e_1_3_2_1_5_1","unstructured":"ARM Limited \"big.LITTLE Technology: The Future of Mobile \" www.arm.com\/files\/pdf\/big_LITTLE_Technology_the_Futue_of_Mobile.pdf."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-03869-3_80"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1735688.1735706"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1629575.1629579"},{"key":"e_1_3_2_1_9_1","volume-title":"5th Workshop on Modeling, Benchmarking and Simulation","author":"Bienia C.","year":"2009","unstructured":"C. Bienia and K. Li, \"PARSEC 2.0: A New Benchmark Suite for Chip-Multiprocessors,\" in Proc. 5th Workshop on Modeling, Benchmarking and Simulation, June 2009."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/209936.209958"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.14778\/2536274.2536325"},{"key":"e_1_3_2_1_12_1","unstructured":"\"C++ AMP : Language and Programming Model \" http:\/\/download.microsoft.com\/download\/4\/0\/E\/40EA02D8-23A7-4BD2-AD3A-0BFFFB640F28\/CppAMPLanguageAndProgrammingModel.pdf."},{"key":"e_1_3_2_1_13_1","unstructured":"\"CFS Scheduler \" https:\/\/www.kernel.org\/doc\/Documentation\/scheduler\/sched-design-CFS.txt."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872368"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2463209.2488827"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/1383422.1383447"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0129626411000151"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/224056.224076"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1147\/JRD.2009.2036980"},{"key":"e_1_3_2_1_21_1","unstructured":"A. Frumusanu \"The Samsung Exynos 7420 Deep Dive - Inside A Modern 14nm SoC \" http:\/\/www.anandtech.com\/show\/9330\/exynos-7420-deep-dive\/2."},{"key":"e_1_3_2_1_22_1","unstructured":"\"Grand Central Dispatch \" http:\/\/developer.apple.com\/library\/ios\/#documentation\/Performance\/Reference\/GCD_libdispatch_Ref\/Reference\/reference.html."},{"key":"e_1_3_2_1_23_1","volume-title":"COD: Database \/ Operating System Co-Design,\" in Conference on Innovative Data Systems Research (CIDR)","author":"Giceva J.","year":"2013","unstructured":"J. Giceva, T.-i. Salomie, A. Schupbach, G. Alonso, and T. Roscoe, \"COD: Database \/ Operating System Co-Design,\" in Conference on Innovative Data Systems Research (CIDR), 2013."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/1987237.1987259"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/2002181.2002184"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","unstructured":"M. Heimel M. Saecker H. Pirk S. Manegold and V. Markl \"Hardware-oblivious Parallelism for In-memory Column-stores \" Proc. VLDB Endow. 10.14778\/2536360.2536370","DOI":"10.14778\/2536360.2536370"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2008.209"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.5555\/1972457.1972488"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1950365.1950390"},{"key":"e_1_3_2_1_30_1","unstructured":"\"HSA Intermediate Language \" https:\/\/hsafoundation.app.box.com\/s\/m6mrsjv8b7r50kqeyyal May 2013."},{"key":"e_1_3_2_1_31_1","volume-title":"A Software Perspective,\" http:\/\/www.intel.com\/cd\/ids\/developer\/asmo-na\/eng\/downloads\/54118.htm","author":"Intel Corporation","year":"2005","unstructured":"Intel Corporation, \"Thermal Protection And Monitoring Features: A Software Perspective,\" http:\/\/www.intel.com\/cd\/ids\/developer\/asmo-na\/eng\/downloads\/54118.htm, 2005."},{"key":"e_1_3_2_1_32_1","unstructured":"\"Intel Sandy Bridge \" http:\/\/software.intel.com\/en-us\/blogs\/2011\/01\/13\/a-look-at-sandy-bridge-integrating-graphics-into-the-cpu."},{"key":"e_1_3_2_1_33_1","unstructured":"B. Jeff \"big.LITTLE Technology Moves Towards Fully Heterogeneous Global Task Scheduling \" http:\/\/www.arm.com\/files\/pdf\/big_LITTLE_technology_moves_towards_fully_heterogeneous_Global_Task_Scheduling.pdf Nov. 2013."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750392"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/75246.75252"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2464996.2465007"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1346281.1346318"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669121"},{"key":"e_1_3_2_1_39_1","volume-title":"Apr.","author":"Qualcomm","year":"2014","unstructured":"\"Qualcomm MARE: Enabling Applications for Heterogeneous Mobile Devices,\" https:\/\/developer.qualcomm.com\/downloads\/whitepaper-qualcomm-mare-enabling-applications-heterogeneous-mobile-devices, Apr. 2014."},{"key":"e_1_3_2_1_40_1","unstructured":"E. Marth and G. Marcus \"Parallelization of the x264 encoder using OpenCL \" http:\/\/li5.ziti.uni-heidelberg.de\/x264gpu\/."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541963"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","unstructured":"N. Mishra H. Zhang J. D. Lafferty and H. Hoffmann \"A Probabilistic Graphical Model-based Approach for Minimizing Energy Under Performance Constraints \" in Proceedings of the Twentieth International Conference on Architectural Support for Programming Languages and Operating Systems ser. ASPLOS '15 2015 pp. 267--281. 10.1145\/2694344.2694373","DOI":"10.1145\/2694344.2694373"},{"key":"e_1_3_2_1_43_1","unstructured":"T. P. Morgan \"Oracle Cranks Up The Cores To 32 With Sparc M7 Chip \" http:\/\/www.enterprisetech.com\/2014\/08\/13\/oracle-cranks-cores-32-sparc-m7-chip\/ Aug. 2014 enterpriseTech."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/1629575.1629597"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/268998.266708"},{"key":"e_1_3_2_1_46_1","unstructured":"NVidia Inc. \"CUDA Toolkit 4.1 \" http:\/\/www.developer.nvidia.com\/cuda-toolkit-41 2011."},{"key":"e_1_3_2_1_47_1","unstructured":"\"NVIDIA OpenCL SDK \" http:\/\/developer.download.nvidia.com\/compute\/cuda\/3_0\/sdk\/website\/OpenCL\/website\/samples.html."},{"key":"e_1_3_2_1_48_1","unstructured":"\"The OpenACC Application Program Interface \" http:\/\/www.openacc-standard.org\/."},{"key":"e_1_3_2_1_49_1","unstructured":"\"OpenCL - The open standard for parallel programming of heterogeneous systems \" http:\/\/download.microsoft.com\/download\/4\/0\/E\/40EA02D8--23A7-4BD2-AD3A-0BFFFB640F28\/CppAMPLanguageAndProgrammingModel.pdf."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1806596.1806639"},{"key":"e_1_3_2_1_51_1","unstructured":"\"Parallel Implementation of bzip2 \" http:\/\/compression.ca\/pbzip2\/."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/1863086.1863096"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2013.53"},{"key":"e_1_3_2_1_54_1","first-page":"130","volume":"51","author":"Planas J.","year":"2015","unstructured":"J. Planas, R. M. Badia, E. Ayguade, and J. Labarta, \"AMA: Asynchronous Management of Accelerators for Task-based Programming Models,\" Procedia Computer Science, vol. 51, pp. 130--139, 2015.","journal-title":"\"AMA: Asynchronous Management of Accelerators for Task-based Programming Models,\" Procedia Computer Science"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2832105.2832109"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2043556.2043579"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/1755913.1755929"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/2248418.2248428"},{"issue":"99","key":"e_1_3_2_1_59_1","first-page":"1","author":"Altaf M. Shoaib Bin","year":"2014","unstructured":"M. Shoaib Bin Altaf and D. Wood, \"LogCA: A Performance Model for Hardware Accelerators,\" Computer Architecture Letters, vol. PP, no. 99, pp. 1--1, 2014.","journal-title":"\"LogCA: A Performance Model for Hardware Accelerators,\" Computer Architecture Letters"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451169"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/2594291.2594292"},{"key":"e_1_3_2_1_62_1","unstructured":"\"StarPU Task Scheduling Policy \" http:\/\/starpu.gforge.inria.fr\/doc\/html\/Scheduling.html."},{"key":"e_1_3_2_1_63_1","volume-title":"Parboil: A Revised Benchmark Suite for Scientific and Commercial Throughput Computing,\" Center for Reliable and High-Performance Computing","author":"Stratton J.","year":"2012","unstructured":"J. Stratton, C. Rodrigues, I. Sung, N. Obeid, L. Chang, N. Anssari, G. Liu, and W. Hwu, \"Parboil: A Revised Benchmark Suite for Scientific and Commercial Throughput Computing,\" Center for Reliable and High-Performance Computing, 2012."},{"key":"e_1_3_2_1_64_1","unstructured":"N. Sun and C.-C. Lin \"Using the Cryptographic Accelerators in the UltraSparc T1 and T2 Processors \" http:\/\/www.oracle.com\/technetwork\/server-storage\/archive\/a11-014-crypto-accelerators-439765.pdf Nov. 2007."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/2228360.2228567"},{"key":"e_1_3_2_1_66_1","unstructured":"\"Truecrack \" https:\/\/code.google.com\/p\/truecrack\/."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.5555\/851041.856928"}],"event":{"name":"PACT '16: International Conference on Parallel Architectures and Compilation","sponsor":["IFIP WG 10.3 IFIP WG 10.3","IEEE TCCA IEEE Computer Society Technical Committee on Computer Architecture","SIGARCH ACM Special Interest Group on Computer Architecture","IEEE CS TCPP IEEE Computer Society Technical Committee on Parallel Processing"],"location":"Haifa Israel","acronym":"PACT '16"},"container-title":["Proceedings of the 2016 International Conference on Parallel Architectures and Compilation"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2967938.2967964","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2967938.2967964","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2967938.2967964","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:27:20Z","timestamp":1763458040000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2967938.2967964"}},"subtitle":["Efficient Resource Use in Heterogeneous Architectures"],"short-title":[],"issued":{"date-parts":[[2016,9,11]]},"references-count":67,"alternative-id":["10.1145\/2967938.2967964","10.1145\/2967938"],"URL":"https:\/\/doi.org\/10.1145\/2967938.2967964","relation":{},"subject":[],"published":{"date-parts":[[2016,9,11]]},"assertion":[{"value":"2016-09-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}