{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T16:04:24Z","timestamp":1772726664081,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,14]],"date-time":"2017-10-14T00:00:00Z","timestamp":1507939200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Ministry of Economy and Competitiveness of Spain","award":["TIN2012-34557, TIN2015-65316-P, BES-2013-063925"],"award-info":[{"award-number":["TIN2012-34557, TIN2015-65316-P, BES-2013-063925"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,14]]},"DOI":"10.1145\/3123939.3124534","type":"proceedings-article","created":{"date-parts":[[2017,10,4]],"date-time":"2017-10-04T18:06:06Z","timestamp":1507140366000},"page":"123-135","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":54,"title":["Beyond the socket"],"prefix":"10.1145","author":[{"given":"Ugljesa","family":"Milic","sequence":"first","affiliation":[{"name":"Barcelona Supercomputing Center (BSC) and Universitat Polit\u00e8cnica de Catalunya (UPC)"}]},{"given":"Oreste","family":"Villa","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Evgeny","family":"Bolotin","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Akhil","family":"Arunkumar","sequence":"additional","affiliation":[{"name":"Arizona State University"}]},{"given":"Eiman","family":"Ebrahimi","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Aamer","family":"Jaleel","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Alex","family":"Ramirez","sequence":"additional","affiliation":[{"name":"Google"}]},{"given":"David","family":"Nellans","sequence":"additional","affiliation":[{"name":"NVIDIA"}]}],"member":"320","published-online":{"date-parts":[[2017,10,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446089"},{"key":"e_1_3_2_1_2_1","unstructured":"AMD Inc. 2017. AMD's Infinity Fabric Detailed. http:\/\/wccftech.com\/amds-infinity-fabric-detailed\/. (2017). {Online; accessed 2017-04-04}.  AMD Inc. 2017. AMD's Infinity Fabric Detailed. http:\/\/wccftech.com\/amds-infinity-fabric-detailed\/. (2017). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080231"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807611"},{"key":"e_1_3_2_1_5_1","unstructured":"P. Bright. 2016. Moore's Law Really is Dead This Time. http:\/\/arstechnica.com\/information-technology\/2016\/02\/moores-law-really-is-dead-this-time. (2016). {Online; accessed 2017-04-04}.  P. Bright. 2016. Moore's Law Really is Dead This Time. http:\/\/arstechnica.com\/information-technology\/2016\/02\/moores-law-really-is-dead-this-time. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_6_1","volume-title":"PCI Express Switches. https:\/\/www.broadcom.com\/products\/pcie-switches-bridges\/pcie-switches\/. (2017). {Online","year":"2017","unstructured":"Broadcom. 2017. PCI Express Switches. https:\/\/www.broadcom.com\/products\/pcie-switches-bridges\/pcie-switches\/. (2017). {Online ; accessed 2017 -07-10}. Broadcom. 2017. PCI Express Switches. https:\/\/www.broadcom.com\/products\/pcie-switches-bridges\/pcie-switches\/. (2017). {Online; accessed 2017-07-10}."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751218"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1274971.1275005"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_10_1","unstructured":"INTEL Corporation. 2004. Intel Xeon Processor with 533 MHz Front Side Bus at 2 GHz to 3.20 GHz. http:\/\/download.intel.com\/support\/processors\/xeon\/sb\/25213506.pdf. (2004). {Online; accessed 2017-04-04}.  INTEL Corporation. 2004. Intel Xeon Processor with 533 MHz Front Side Bus at 2 GHz to 3.20 GHz. http:\/\/download.intel.com\/support\/processors\/xeon\/sb\/25213506.pdf. (2004). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_11_1","unstructured":"INTEL Corporation. 2007. The Xeon X5365. http:\/\/ark.intel.com\/products\/30702\/Intel-Xeon-Processor-X5365-8M-Cache-3_00-GHz-1333-MHz-FSB. (2007). {Online; accessed 2016-08-19}.  INTEL Corporation. 2007. The Xeon X5365. http:\/\/ark.intel.com\/products\/30702\/Intel-Xeon-Processor-X5365-8M-Cache-3_00-GHz-1333-MHz-FSB. (2007). {Online; accessed 2016-08-19}."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.38"},{"key":"e_1_3_2_1_13_1","volume-title":"Computer Architecture: A Quantitative Approach","author":"Hennessy J. L.","year":"2011","unstructured":"J. L. Hennessy and D. A. Patterson . 2011 . Computer Architecture: A Quantitative Approach ( 5 th ed.). Elsevier . J. L. Hennessy and D. A. Patterson. 2011. Computer Architecture: A Quantitative Approach (5th ed.). Elsevier.","edition":"5"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"A. Herdrich E. Verplanke P. Autee R. Illikkal C. Gianos R. Singhal and R. Iyer. 2016. Cache QoS: From Concept to Reality in the Intel Xeon Processor E5-2600 v3 Product Family. HPCA (2016).  A. Herdrich E. Verplanke P. Autee R. Illikkal C. Gianos R. Singhal and R. Iyer. 2016. Cache QoS: From Concept to Reality in the Intel Xeon Processor E5-2600 v3 Product Family. HPCA (2016).","DOI":"10.1109\/HPCA.2016.7446102"},{"key":"e_1_3_2_1_15_1","unstructured":"HSA Fondation. 2016. HSA Platform System Architecture Specification 1.1. http:\/\/www.hsafoundation.com\/?ddownload=5114. (2016). {Online; accessed 2017-13-06}.  HSA Fondation. 2016. HSA Platform System Architecture Specification 1.1. http:\/\/www.hsafoundation.com\/?ddownload=5114. (2016). {Online; accessed 2017-13-06}."},{"key":"e_1_3_2_1_16_1","unstructured":"SK Hynix. 2009. Hynix GDDR5 SGRAM Part H5GQ1H24AFR Datasheet Revision 1.0. https:\/\/www.skhynix.com\/eolproducts.view.do?pronm=GDDR5+SDRAM&srnm=H5GQ1H24AFR&rk=26&rc=graphics. &rk=26&rc=graphics. &rc=graphics. &rk=26&rc=graphics. &rc=graphics. &rc=graphics. (2009). {Online; accessed 2017-04-04}.  SK Hynix. 2009. Hynix GDDR5 SGRAM Part H5GQ1H24AFR Datasheet Revision 1.0. https:\/\/www.skhynix.com\/eolproducts.view.do?pronm=GDDR5+SDRAM&srnm=H5GQ1H24AFR&rk=26&rc=graphics. &rk=26&rc=graphics. &rc=graphics. &rk=26&rc=graphics. &rc=graphics. &rc=graphics. (2009). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_17_1","volume-title":"http:\/\/www.hypertransport.org\/ht-3-1-link-spec. (2010). {Online","author":"HyperTransport Consortium","year":"2017","unstructured":"HyperTransport Consortium . 2010. HyperTransport 3.1 Specification . http:\/\/www.hypertransport.org\/ht-3-1-link-spec. (2010). {Online ; accessed 2017 -04-04}. HyperTransport Consortium. 2010. HyperTransport 3.1 Specification. http:\/\/www.hypertransport.org\/ht-3-1-link-spec. (2010). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_18_1","unstructured":"IBM. 2011. IBM zEnterprise 196 Technical Guide. http:\/\/www.redbooks.ibm.com\/redbooks\/pdfs\/sg247833.pdf. (2011). {Online; accessed 2017-04-04}.   IBM. 2011. IBM zEnterprise 196 Technical Guide. http:\/\/www.redbooks.ibm.com\/redbooks\/pdfs\/sg247833.pdf. (2011). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_19_1","unstructured":"IBM. 2012. IBM Power Systems Deep Dive. http:\/\/www-05.ibm.com\/cz\/events\/febannouncement2012\/pdf\/power_architecture.pdf. (2012). {Online; accessed 2017-04-04}.  IBM. 2012. IBM Power Systems Deep Dive. http:\/\/www-05.ibm.com\/cz\/events\/febannouncement2012\/pdf\/power_architecture.pdf. (2012). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_20_1","unstructured":"AMD Inc. 2012. AMD Server Solutions Playbook. http:\/\/www.amd.com\/Documents\/AMD_Opteron_ServerPlaybook.pdf. (2012). {Online; accessed 2017-04-04}.  AMD Inc. 2012. AMD Server Solutions Playbook. http:\/\/www.amd.com\/Documents\/AMD_Opteron_ServerPlaybook.pdf. (2012). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_21_1","unstructured":"INTEL Corporation. 2009. An Introduction to the Intel QuickPath Interconnect. http:\/\/www.intel.com\/content\/www\/us\/en\/io\/quickpath-technology\/quick-path-interconnect-introduction-paper.html. (2009). {Online; accessed 2017-04-04}.  INTEL Corporation. 2009. An Introduction to the Intel QuickPath Interconnect. http:\/\/www.intel.com\/content\/www\/us\/en\/io\/quickpath-technology\/quick-path-interconnect-introduction-paper.html. (2009). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_22_1","volume":"200","author":"Jaleel A.","unstructured":"A. Jaleel , W. Hasenplaugh , M. Qureshi , J. Sebot , Jr S. Steely , and J. Emer. 200 8. Adaptive Insertion Policies for Managing Shared Caches. Proceedings of MICRO (Oct 2008). A. Jaleel, W. Hasenplaugh, M. Qureshi, J. Sebot, Jr S. Steely, and J. Emer. 2008. Adaptive Insertion Policies for Managing Shared Caches. Proceedings of MICRO (Oct 2008).","journal-title":"J. Emer."},{"key":"e_1_3_2_1_23_1","unstructured":"JEDEC. 2015. High Bandwidth Memory(HBM) DRAM - JESD235. http:\/\/www.jedec.org\/standards-documents\/results\/jesd235. (2015). {Online; accessed 2017-04-04}.  JEDEC. 2015. High Bandwidth Memory(HBM) DRAM - JESD235. http:\/\/www.jedec.org\/standards-documents\/results\/jesd235. (2015). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_24_1","volume-title":"https:\/\/www.khronos.org\/opencl\/. (2016). {Online","author":"KHRONOS GROUP.","year":"2017","unstructured":"KHRONOS GROUP. 2016. OpenCL 2.2 API Specification (Provisional). https:\/\/www.khronos.org\/opencl\/. (2016). {Online ; accessed 2017 -04-04}. KHRONOS GROUP. 2016. OpenCL 2.2 API Specification (Provisional). https:\/\/www.khronos.org\/opencl\/. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2038037.1941591"},{"key":"e_1_3_2_1_26_1","volume-title":"The World's #1 Open Science Super Computer. https:\/\/www.olcf.ornl.gov\/titan\/. (2013). {Online","author":"Oak Ridge National Laboratory. 2013. Titan","year":"2017","unstructured":"Oak Ridge National Laboratory. 2013. Titan : The World's #1 Open Science Super Computer. https:\/\/www.olcf.ornl.gov\/titan\/. (2013). {Online ; accessed 2017 -04-04}. Oak Ridge National Laboratory. 2013. Titan : The World's #1 Open Science Super Computer. https:\/\/www.olcf.ornl.gov\/titan\/. (2013). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_27_1","unstructured":"Andrew Lavin. 2015. Fast Algorithms for Convolutional Neural Networks. http:\/\/arxiv.org\/abs\/1509.09308. (2015). {Online; accessed 2017-04-04}.  Andrew Lavin. 2015. Fast Algorithms for Convolutional Neural Networks. http:\/\/arxiv.org\/abs\/1509.09308. (2015). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_28_1","volume-title":"https:\/\/asc.llnl.gov\/CORAL-benchmarks\/. (2014). {Online","author":"Lawerence Livermore National Laboratory. 2014. CORAL Benchmarks.","year":"2017","unstructured":"Lawerence Livermore National Laboratory. 2014. CORAL Benchmarks. https:\/\/asc.llnl.gov\/CORAL-benchmarks\/. (2014). {Online ; accessed 2017 -04-04}. Lawerence Livermore National Laboratory. 2014. CORAL Benchmarks. https:\/\/asc.llnl.gov\/CORAL-benchmarks\/. (2014). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_29_1","volume-title":"https:\/\/asc.llnl.gov\/coral-info. (2016). {Online","author":"Lawerence Livermore National Laboratory. 2016. CORAL\/Sierra.","year":"2017","unstructured":"Lawerence Livermore National Laboratory. 2016. CORAL\/Sierra. https:\/\/asc.llnl.gov\/coral-info. (2016). {Online ; accessed 2017 -04-04}. Lawerence Livermore National Laboratory. 2016. CORAL\/Sierra. https:\/\/asc.llnl.gov\/coral-info. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_30_1","volume-title":"Transparent CPU-GPU Collaboration for Data-Parallel Kernels on Heterogeneous Systems. In International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE.","author":"Lee Janghaeng","year":"2013","unstructured":"Janghaeng Lee , Mehrzad Samadi , Yongjun Park , and Scott Mahlke . 2013 . Transparent CPU-GPU Collaboration for Data-Parallel Kernels on Heterogeneous Systems. In International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE. Janghaeng Lee, Mehrzad Samadi, Yongjun Park, and Scott Mahlke. 2013. Transparent CPU-GPU Collaboration for Data-Parallel Kernels on Heterogeneous Systems. In International Conference on Parallel Architectures and Compilation Techniques (PACT). IEEE."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"D. Li M. Rhu D. R. Johnson M. O'Connor M. Erez D. Burger D. S. Fussell and S. W. Keckler. 2015. Priority-Based Cache Allocation in Throughput Processors. HPCA (2015).  D. Li M. Rhu D. R. Johnson M. O'Connor M. Erez D. Burger D. S. Fussell and S. W. Keckler. 2015. Priority-Based Cache Allocation in Throughput Processors. HPCA (2015).","DOI":"10.1109\/HPCA.2015.7056024"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/3014904.3015007"},{"key":"e_1_3_2_1_33_1","volume-title":"Carrizo: A High Performance, Energy Efficient 28 nm APU. Journal of Solid-State Circuits","author":"Munger Benjamin","year":"2016","unstructured":"Benjamin Munger , David Akeson , Srikanth Arekapudi , Tom Burd , Harry R Fair , Jim Farrell , Dave Johnson , Guhan Krishnan , Hugh McIntyre , Edward McLellan , 2016 . Carrizo: A High Performance, Energy Efficient 28 nm APU. Journal of Solid-State Circuits (2016). Benjamin Munger, David Akeson, Srikanth Arekapudi, Tom Burd, Harry R Fair, Jim Farrell, Dave Johnson, Guhan Krishnan, Hugh McIntyre, Edward McLellan, et al. 2016. Carrizo: A High Performance, Energy Efficient 28 nm APU. Journal of Solid-State Circuits (2016)."},{"key":"e_1_3_2_1_34_1","volume-title":"GPU Accelerated Deep Learning. https:\/\/developer.nvidia.com\/cudnn. ({n. d.}). {Online","author":"NVIDIA.","year":"2017","unstructured":"NVIDIA. {n. d.}. NVIDIA cuDNN , GPU Accelerated Deep Learning. https:\/\/developer.nvidia.com\/cudnn. ({n. d.}). {Online ; accessed 2017 -04-04}. NVIDIA. {n. d.}. NVIDIA cuDNN, GPU Accelerated Deep Learning. https:\/\/developer.nvidia.com\/cudnn. ({n. d.}). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_35_1","unstructured":"NVIDIA. {n. d.}. The World's First AI Supercomputer in a Box. http:\/\/www.nvidia.com\/object\/deep-learning-system.html. ({n. d.}). {Online; accessed 2017-04-04}.  NVIDIA. {n. d.}. The World's First AI Supercomputer in a Box. http:\/\/www.nvidia.com\/object\/deep-learning-system.html. ({n. d.}). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_36_1","unstructured":"NVIDIA. 2011. Multi-GPU Programming. http:\/\/www.nvidia.com\/docs\/IO\/116711\/sc11-multi-gpu.pdf. (2011). {Online; accessed 2017-04-04}.  NVIDIA. 2011. Multi-GPU Programming. http:\/\/www.nvidia.com\/docs\/IO\/116711\/sc11-multi-gpu.pdf. (2011). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_37_1","unstructured":"NVIDIA. 2013. Unified Memory in CUDA 6. http:\/\/devblogs.nvidia.com\/parallelforall\/unified-memory-in-cuda-6\/. (2013). {Online; accessed 2017-04-04}.  NVIDIA. 2013. Unified Memory in CUDA 6. http:\/\/devblogs.nvidia.com\/parallelforall\/unified-memory-in-cuda-6\/. (2013). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_38_1","unstructured":"NVIDIA. 2014. Compute Unified Device Architecture. http:\/\/www.nvidia.com\/object\/cuda_home_new.html. (2014). {Online; accessed 2017-04-04}.  NVIDIA. 2014. Compute Unified Device Architecture. http:\/\/www.nvidia.com\/object\/cuda_home_new.html. (2014). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_39_1","unstructured":"NVIDIA. 2014. NVIDIA Launches World's First High-Speed GPU Interconnect Helping Pave the Way to Exascale Computing. http:\/\/nvidianews.nvidia.com\/news\/nvidia-launches-world-s-first-high-speed-gpu\/interconnect-helping-pave-theway-to-exascale-computing. (2014). {Online; accessed 2017-04-04}.  NVIDIA. 2014. NVIDIA Launches World's First High-Speed GPU Interconnect Helping Pave the Way to Exascale Computing. http:\/\/nvidianews.nvidia.com\/news\/nvidia-launches-world-s-first-high-speed-gpu\/interconnect-helping-pave-theway-to-exascale-computing. (2014). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_40_1","unstructured":"NVIDIA. 2015. CUDA C Programming Guild v7.0. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html. (2015). {Online; accessed 2017-04-04}.  NVIDIA. 2015. CUDA C Programming Guild v7.0. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html. (2015). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_41_1","volume-title":"Inside Pascal: NVIDIA's Newest Computing Platform. https:\/\/devblogs.nvidia.com\/parallelforall\/inside-pascal.","author":"NVIDIA.","year":"2016","unstructured":"NVIDIA. 2016 . Inside Pascal: NVIDIA's Newest Computing Platform. https:\/\/devblogs.nvidia.com\/parallelforall\/inside-pascal. (2016). {Online; accessed 2017-04-04}. NVIDIA. 2016. Inside Pascal: NVIDIA's Newest Computing Platform. https:\/\/devblogs.nvidia.com\/parallelforall\/inside-pascal. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_42_1","volume-title":"MPI Solutions for GPUs. https:\/\/developer.nvidia.com\/mpi-solutions-gpus. (2016). {Online","author":"NVIDIA.","year":"2017","unstructured":"NVIDIA. 2016. MPI Solutions for GPUs. https:\/\/developer.nvidia.com\/mpi-solutions-gpus. (2016). {Online ; accessed 2017 -04-04}. NVIDIA. 2016. MPI Solutions for GPUs. https:\/\/developer.nvidia.com\/mpi-solutions-gpus. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_43_1","volume-title":"NVIDIA Tesla P100. https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascal-architecture-whitepaper.pdf. (2016). {Online","author":"NVIDIA.","year":"2017","unstructured":"NVIDIA. 2016. NVIDIA Tesla P100. https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascal-architecture-whitepaper.pdf. (2016). {Online ; accessed 2017 -04-04}. NVIDIA. 2016. NVIDIA Tesla P100. https:\/\/images.nvidia.com\/content\/pdf\/tesla\/whitepaper\/pascal-architecture-whitepaper.pdf. (2016). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_44_1","unstructured":"NVIDIA. 2017. Scalable Link Interconnect. http:\/\/www.nvidia.in\/object\/sli-technology-overview-in.html. (2017). {Online; accessed 2017-07-10}.  NVIDIA. 2017. Scalable Link Interconnect. http:\/\/www.nvidia.in\/object\/sli-technology-overview-in.html. (2017). {Online; accessed 2017-07-10}."},{"key":"e_1_3_2_1_45_1","volume-title":"Microarchitectural Performance Characterization of Irregular GPU Kernels. In International Symposium on Workload Characterization (IISWC).","author":"O'Neil M. A.","unstructured":"M. A. O'Neil and M. Burtscher . 2014 . Microarchitectural Performance Characterization of Irregular GPU Kernels. In International Symposium on Workload Characterization (IISWC). M. A. O'Neil and M. Burtscher. 2014. Microarchitectural Performance Characterization of Irregular GPU Kernels. In International Symposium on Workload Characterization (IISWC)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2786763.2694346"},{"key":"e_1_3_2_1_47_1","volume-title":"PCI Express Base Specification Revision 3.1a. https:\/\/members.pcisig.com\/wg\/PCI-SIG\/document\/download\/8257. (2015). {Online","author":"PCI-SIG.","year":"2017","unstructured":"PCI-SIG. 2015. PCI Express Base Specification Revision 3.1a. https:\/\/members.pcisig.com\/wg\/PCI-SIG\/document\/download\/8257. (2015). {Online ; accessed 2017 -07-10}. PCI-SIG. 2015. PCI Express Base Specification Revision 3.1a. https:\/\/members.pcisig.com\/wg\/PCI-SIG\/document\/download\/8257. (2015). {Online; accessed 2017-07-10}."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2014.7478832"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540747"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2884045.2884052"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2006.49"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/1152154.1152160"},{"key":"e_1_3_2_1_53_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. http:\/\/arxiv.org\/abs\/1409.1556. (2014). {Online; accessed 2017-04-04}.  Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. http:\/\/arxiv.org\/abs\/1409.1556. (2014). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830821"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522351"},{"key":"e_1_3_2_1_56_1","volume-title":"Yamamura","author":"Spence John R.","year":"1985","unstructured":"John R. Spence and Michael M . Yamamura . 1985 . Clocked Tri-State Driver Circuit . https:\/\/www.google.com\/patents\/US4504745. (1985). John R. Spence and Michael M. Yamamura. 1985. Clocked Tri-State Driver Circuit. https:\/\/www.google.com\/patents\/US4504745. (1985)."},{"key":"e_1_3_2_1_57_1","volume-title":"Enabling Preemptive Multiprogramming on GPUs. In ACM\/IEEE International Symposium on Computer Architecture (ISCA).","author":"Tanasic Ivan","year":"2014","unstructured":"Ivan Tanasic , Isaac Gelado , Javier Cabezas , Alex Ramirez , Nacho Navarro , and Mateo Valero . 2014 . Enabling Preemptive Multiprogramming on GPUs. In ACM\/IEEE International Symposium on Computer Architecture (ISCA). Ivan Tanasic, Isaac Gelado, Javier Cabezas, Alex Ramirez, Nacho Navarro, and Mateo Valero. 2014. Enabling Preemptive Multiprogramming on GPUs. In ACM\/IEEE International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_58_1","unstructured":"Mellanox Technologies. 2015. Switch-IB 2 EDR Switch Silicon - World's First Smart Switch. http:\/\/www.mellanox.com\/related-docs\/prod_silicon\/PB_SwitchIB2_EDR_Switch_Silicon.pdf. (2015). {Online; accessed 2017-04-04}.  Mellanox Technologies. 2015. Switch-IB 2 EDR Switch Silicon - World's First Smart Switch. http:\/\/www.mellanox.com\/related-docs\/prod_silicon\/PB_SwitchIB2_EDR_Switch_Silicon.pdf. (2015). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_59_1","first-page":"X","volume":"201","unstructured":"Mellanox Technologies. 201 6. Connect X - 4 VPI Single and Dual Port QSFP28 Adapter Card User Manual. http:\/\/www.mellanox.com\/related-docs\/user_manuals\/ConnectX-4_VPI_Single_and_Dual_QSFP28_Port_Adapter_Card_User_Manual.pdf. (2016). {Online; accessed 2017-04-04}. Mellanox Technologies. 2016. ConnectX-4 VPI Single and Dual Port QSFP28 Adapter Card User Manual. http:\/\/www.mellanox.com\/related-docs\/user_manuals\/ConnectX-4_VPI_Single_and_Dual_QSFP28_Port_Adapter_Card_User_Manual.pdf. (2016). {Online; accessed 2017-04-04}.","journal-title":"Mellanox Technologies."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ETSYM.2010.5512785"},{"key":"e_1_3_2_1_61_1","unstructured":"C. G. Willard A. Snell and M. Feldman. 2015. HPC Application Support for GPU Computing. http:\/\/www.intersect360.com\/industry\/reports.php?id=131. (2015). {Online; accessed 2017-04-04}.  C. G. Willard A. Snell and M. Feldman. 2015. HPC Application Support for GPU Computing. http:\/\/www.intersect360.com\/industry\/reports.php?id=131. (2015). {Online; accessed 2017-04-04}."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/2786572.2786596"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/2996190"}],"event":{"name":"MICRO-50: The 50th Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Cambridge Massachusetts","acronym":"MICRO-50","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE-CS\\DATC IEEE Computer Society"]},"container-title":["Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123939.3124534","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3123939.3124534","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:30:31Z","timestamp":1750217431000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123939.3124534"}},"subtitle":["NUMA-aware GPUs"],"short-title":[],"issued":{"date-parts":[[2017,10,14]]},"references-count":63,"alternative-id":["10.1145\/3123939.3124534","10.1145\/3123939"],"URL":"https:\/\/doi.org\/10.1145\/3123939.3124534","relation":{},"subject":[],"published":{"date-parts":[[2017,10,14]]},"assertion":[{"value":"2017-10-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}