{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T03:47:38Z","timestamp":1772164058239,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,6,24]],"date-time":"2017-06-24T00:00:00Z","timestamp":1498262400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,6,24]]},"DOI":"10.1145\/3079856.3080231","type":"proceedings-article","created":{"date-parts":[[2017,6,15]],"date-time":"2017-06-15T15:40:01Z","timestamp":1497541201000},"page":"320-332","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":118,"title":["MCM-GPU"],"prefix":"10.1145","author":[{"given":"Akhil","family":"Arunkumar","sequence":"first","affiliation":[{"name":"Arizona State University"}]},{"given":"Evgeny","family":"Bolotin","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Benjamin","family":"Cho","sequence":"additional","affiliation":[{"name":"University of Texas at Austin"}]},{"given":"Ugljesa","family":"Milic","sequence":"additional","affiliation":[{"name":"Barcelona Supercomputing Center \/ Universitat Politecnica de Catalunya"}]},{"given":"Eiman","family":"Ebrahimi","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Oreste","family":"Villa","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Aamer","family":"Jaleel","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Carole-Jean","family":"Wu","sequence":"additional","affiliation":[{"name":"Arizona State University"}]},{"given":"David","family":"Nellans","sequence":"additional","affiliation":[{"name":"NVIDIA"}]}],"member":"320","published-online":{"date-parts":[[2017,6,24]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Xenos: XBOX360 GPU. (2005)","unstructured":"2005. Xenos: XBOX360 GPU. (2005). http:\/\/fileadmin.cs.lth.se\/cs\/Personal\/MichaelDoggett\/talks\/eg05-xenos-doggett.pdf Accessed: 2016-08-19."},{"key":"e_1_3_2_1_2_1","volume-title":"The Xeon X5365. (2007)","unstructured":"2007. The Xeon X5365. (2007). http:\/\/ark.intel.com\/products\/30702\/Intel-Xeon-Processor-X5365-8M-Cache-300-GHz-1333-MHz-FSB Accessed: 2016-08-19."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.5555\/2132599"},{"key":"e_1_3_2_1_4_1","volume-title":"AMD Server Solutions Playbook. (2012)","unstructured":"2012. AMD Server Solutions Playbook. (2012). http:\/\/www.amd.com\/Documents\/AMD_Opteron_ServerPlaybook.pdf Accessed: 2016-08-19."},{"key":"e_1_3_2_1_5_1","volume-title":"IBM Power Systems Deep Dive. (2012)","unstructured":"2012. IBM Power Systems Deep Dive. (2012). http:\/\/www-05.ibm.com\/cz\/events\/febannouncement2012\/pdf\/power_architecture.pdf Accessed: 2016-08-19."},{"key":"e_1_3_2_1_6_1","volume-title":"https:\/\/asc.llnl.gov\/CORAL-benchmarks\/","author":"Benchmarks CORAL","year":"2014","unstructured":"2014. CORAL Benchmarks. (2014). https:\/\/asc.llnl.gov\/CORAL-benchmarks\/"},{"key":"e_1_3_2_1_7_1","volume-title":"Intel Delays 10nm to","year":"2017","unstructured":"2015. Intel Delays 10nm to 2017. (2015). http:\/\/www.extremetech.com\/computing\/210050-intel-confirms-10nm-delayed-to-2017-will-introduce-kaby-\\lake-at-14nm-to-fill-gap"},{"key":"e_1_3_2_1_8_1","unstructured":"2015. International Technology Roadmap for Semiconductors 2.0. (2015). http:\/\/www.itrs2.net\/itrs-reports.html"},{"key":"e_1_3_2_1_9_1","volume-title":"Switch-IB 2 EDR Switch Silicon - World's First Smart Switch. (2015)","unstructured":"2015. Switch-IB 2 EDR Switch Silicon - World's First Smart Switch. (2015). http:\/\/www.mellanox.com\/related-docs\/prod_silicon\/PB_SwitchIB2_EDR_Switch_Silicon.pdf Accessed: 2016-06-20."},{"key":"e_1_3_2_1_10_1","unstructured":"2015. TESLA K80 GPU ACCELERATOR. (2015). https:\/\/images.nvidia.com\/content\/pdf\/kepler\/Tesla-K80-BoardSpec-07317-001-v05.pdf Accessed: 2016-06-20."},{"key":"e_1_3_2_1_11_1","unstructured":"2015. The Compute Architecture of Intel Processor Graphics Gen8. (2015). https:\/\/software.intel.com Accessed: 2016-08-19."},{"key":"e_1_3_2_1_12_1","volume-title":"TOP500 Shows Growing Momentum for Accelerators. (2015)","unstructured":"2015. TOP500 Shows Growing Momentum for Accelerators. (2015). http:\/\/insidehpc.com\/2015\/11\/top500-shows-growing-momentum-for-accelerators\/ Accessed: 2016-06-20."},{"key":"e_1_3_2_1_13_1","volume-title":"ConnectX-4 VPI Single and Dual Port QSFP28 Adapter Card User Manual. (2016)","unstructured":"2016. ConnectX-4 VPI Single and Dual Port QSFP28 Adapter Card User Manual. (2016). http:\/\/www.mellanox.com\/related-docs\/user_manuals\/ConnectX-4_VPI_Single_and_Dual_QSFP28_Port_Adapter_Card_User_Manual.pdf Accessed: 2016-06-20."},{"key":"e_1_3_2_1_14_1","unstructured":"2016. Inside Pascal: NVIDIA's Newest Computing Platform. (2016). https:\/\/devblogs.nvidia.com\/parallelforall\/inside-pascal Accessed: 2016-06-20."},{"key":"e_1_3_2_1_15_1","volume-title":"GPU Accelerated Deep Learning.","year":"2016","unstructured":"2016. NVIDIA cuDNN, GPU Accelerated Deep Learning. (2016). https:\/\/developer.nvidia.com\/cudnn Accessed: 2016-11-17."},{"key":"e_1_3_2_1_16_1","volume-title":"NVIDIA NVLink High-Speed Interconnect. (2016)","unstructured":"2016. NVIDIA NVLink High-Speed Interconnect. (2016). http:\/\/www.nvidia.com\/object\/nvlink.html Accessed: 2016-06-20."},{"key":"e_1_3_2_1_17_1","volume-title":"The New NVIDIA Pascal Architecture. (2016)","unstructured":"2016. The New NVIDIA Pascal Architecture. (2016). http:\/\/www.nvidia.com\/object\/gpu-architecture.html Accessed: 2016-06-20."},{"key":"e_1_3_2_1_18_1","unstructured":"2016. The TWINSCAN NXT:1950i Dual-Stage Immersion Lithography System. (2016). https:\/\/www.asml.com\/products\/systems\/twinscan-nxt\/twinscan-nxt1950i\/en\/s46772?dfp_product_id=822 Accessed: 2016-11-18."},{"key":"e_1_3_2_1_19_1","unstructured":"2016. Titan: The world's #1 Open Science Super Computer. (2016). https:\/\/www.olcf.ornl.gov\/titan\/"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2807591.2807611"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1854273.1854350"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/74850.74854"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/2751205.2751218"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2011.50"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the IEEE International Symposium on Parallel Distributed Processing (IPDPS '10)","author":"Chen Long","unstructured":"Long Chen, Oreste Villa, Sriram Krishnamoorthy, and Guang R. Gao. 2010. Dynamic load balancing on single- and multi-GPU systems. In Proceedings of the IEEE International Symposium on Parallel Distributed Processing (IPDPS '10). IEEE, Atlanta, GA, USA, 1--12."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451157"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2007.7482489"},{"key":"e_1_3_2_1_29_1","unstructured":"Michael Feldman Christopher G. Willard and Addison Snell. 2015. HPC Application Support for GPU Computing. (2015). http:\/\/www.intersect360.com\/industry\/reports.php?id=131"},{"key":"e_1_3_2_1_30_1","unstructured":"Mitsuya Ishida. 2014. Kyocera APX - An Advanced Organic Technology for 2.5D Interposers. (2014). https:\/\/www.ectc.net Accessed: 2016-06-20."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830808"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2011.89"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/1941553.1941591"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/109625.109639"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.435"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.5555\/2523721.2523756"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835937"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.1993.112"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2259016.2259046"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2013.6487788"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/2897937.2898103"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2927964.2927975"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2014.6983052"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2007.908692"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2013.2279053"},{"key":"e_1_3_2_1_46_1","volume-title":"https:\/\/www.openfabrics.org\/downloads\/Media\/Monterey_2011\/Apr5_pcie%20gen3.pdf Accessed: 2016-06-20","author":"Sharma Debendra D.","year":"2014","unstructured":"Debendra D. Sharma. 2014. PCI Express 3.0 Features and Requirements Gathering for beyond. (2014). https:\/\/www.openfabrics.org\/downloads\/Media\/Monterey_2011\/Apr5_pcie%20gen3.pdf Accessed: 2016-06-20."},{"key":"e_1_3_2_1_47_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. ArXiv e-prints (Sept","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. ArXiv e-prints (Sept. 2014). arXiv:cs.CV\/1409.1556"},{"key":"e_1_3_2_1_48_1","volume-title":"Smith and Kazuaki Suzuki","author":"Bruce","year":"2007","unstructured":"Bruce W. Smith and Kazuaki Suzuki. 2007. Microlithography: Science and Technology, Second Edition. https:\/\/books.google.com\/books?id=_hTLDCeIYxoC"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2009.5161065"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.102"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/1272996.1273004"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001199"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/582034.582067"}],"event":{"name":"ISCA '17: The 44th Annual International Symposium on Computer Architecture","location":"Toronto ON Canada","acronym":"ISCA '17","sponsor":["IEEE IEEE Computer Society Technical Committee on Design Automation","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 44th Annual International Symposium on Computer Architecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3079856.3080231","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3079856.3080231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:37:15Z","timestamp":1750203435000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3079856.3080231"}},"subtitle":["Multi-Chip-Module GPUs for Continued Performance Scalability"],"short-title":[],"issued":{"date-parts":[[2017,6,24]]},"references-count":53,"alternative-id":["10.1145\/3079856.3080231","10.1145\/3079856"],"URL":"https:\/\/doi.org\/10.1145\/3079856.3080231","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/3140659.3080231","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2017,6,24]]},"assertion":[{"value":"2017-06-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}