{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T15:09:35Z","timestamp":1782400175978,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,10,12]],"date-time":"2019-10-12T00:00:00Z","timestamp":1570838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,10,12]]},"DOI":"10.1145\/3352460.3358302","type":"proceedings-article","created":{"date-parts":[[2019,10,11]],"date-time":"2019-10-11T11:16:45Z","timestamp":1570792605000},"page":"14-27","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":371,"title":["Simba"],"prefix":"10.1145","author":[{"given":"Yakun Sophia","family":"Shao","sequence":"first","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jason","family":"Clemons","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rangharajan","family":"Venkatesan","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Brian","family":"Zimmer","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Matthew","family":"Fojtik","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nan","family":"Jiang","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ben","family":"Keller","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alicia","family":"Klinefelter","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nathaniel","family":"Pinckney","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Priyanka","family":"Raina","sequence":"additional","affiliation":[{"name":"Stanford"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stephen G.","family":"Tell","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yanqing","family":"Zhang","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"William J.","family":"Dally","sequence":"additional","affiliation":[{"name":"NVIDIA\/Stanford"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Joel","family":"Emer","sequence":"additional","affiliation":[{"name":"NVIDIA\/MIT"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"C. Thomas","family":"Gray","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Brucek","family":"Khailany","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stephen W.","family":"Keckler","sequence":"additional","affiliation":[{"name":"NVIDIA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2019,10,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001138"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783725"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080231"},{"key":"e_1_3_2_1_4_1","unstructured":"Krste Asanovic Rimas Avizienis Jonathan Bachrach Scott Beamer David Biancolin Christopher Celio Henry Cook Daniel Dabbelt John Hauser Adam Izraelevitz Sagar Karandikar Ben Keller Donggyu Kim John Koenig Yunsup Lee Eric Love Martin Maas Albert Magyar Howard Mao Miquel Moreto Albert Ou David A. Patterson Brian Richards Colin Schmidt Stephen Twigg Huy Vo and Andrew Waterman. 2016. The Rocket Chip Generator. Technical Report UCB\/EECS-2016-17. EECS Department University of California Berkeley. http:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2016\/EECS-2016-17.html"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798260"},{"key":"e_1_3_2_1_6_1","volume-title":"SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation. CoRR abs\/1511.00561","author":"Badrinarayanan Vijay","year":"2015","unstructured":"Vijay Badrinarayanan, Alex Kendall, and Roberto Cipolla. 2015. SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation. CoRR abs\/1511.00561 (2015). arXiv:1511.00561"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310173"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the International Symposium on Microarchitecture (MICRO).","author":"Bradford","unstructured":"Bradford M. Beckmann and David A. Wood. 2004. Managing Wire Delay in Large Chip-Multiprocessor Caches. In Proceedings of the International Symposium on Microarchitecture (MICRO)."},{"key":"e_1_3_2_1_9_1","volume-title":"Proceedings of the International Conference on Parallel Architectures and Compilation Techniques (PACT).","author":"Beckmann Nathan","year":"2013","unstructured":"Nathan Beckmann and Daniel Sanchez. 2013. Jigsaw: Scalable Software-Defined Caches. In Proceedings of the International Conference on Parallel Architectures and Compilation Techniques (PACT)."},{"key":"e_1_3_2_1_10_1","volume-title":"Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car. CoRR abs\/1704.07911","author":"Bojarski Mariusz","year":"2017","unstructured":"Mariusz Bojarski, Philip Yeres, Anna Choromanska, Krzysztof Choromanski, Bernhard Firner, Lawrence D. Jackel, and Urs Muller. 2017. Explaining How a Deep Neural Network Trained with End-to-End Learning Steers a Car. CoRR abs\/1704.07911 (2017)."},{"key":"e_1_3_2_1_11_1","volume-title":"Design Automation Conference.","author":"Carloni Luca P.","unstructured":"Luca P. Carloni, Kenneth L. McMillan, Alexander Saldanha, and Alberto L. Sangiovanni-Vincentelli. 1999. A Methodology for Correct-by-construction Latency Insensitive Design. In Design Automation Conference."},{"key":"e_1_3_2_1_12_1","volume-title":"Yuille","author":"Chen Liang-Chieh","year":"2016","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan L. Yuille. 2016. DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs. CoRR abs\/1606.00915 (2016). arXiv:1606.00915"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_16_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA).","author":"Chishti Zeshan","unstructured":"Zeshan Chishti, Michale D. Powell, and T.N. Vijaykumar. 2005. Optimizing Replication, Communication, and Capacity Allocation in CMPs. In Proceedings of the International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_17_1","unstructured":"Eric Chung Jeremy Fowers Kalin Ovtcharov Michael Papamichael Adrian Caulfield Todd Massengill Ming Liu Daniel Lo Shlomi Alkalay Michael Haselman Christian Boehn Oren Firestein Alessandro Forin Kang Su Gatlin Mahdi Ghandi Stephen Heil Kyle Holohan Tamas Juhasz Ratna Kumar Kovvuri Sitaram Lanka Friedel van Megan Dima Mukhortov Prerak Patel Steve Reinhardt Adam Sapek Raja Seera Balaji Sridharan Lisa Woods Philip Yi-Xiao Ritchie Zhao and Doug Burger. 2017. Accelerating Persistent Neural Networks at Datacenter Scale. In HotChips."},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the International Symposia on VLSI Technology and Circuits (VLSI).","author":"Dally William J.","year":"2018","unstructured":"William J. Dally, C. Thomas Gray, John Poulton, Brucek Khailany, John Wilson, and Larry Dennison. 2018. Hardware-Enabled Artifical Intelligence. In Proceedings of the International Symposia on VLSI Technology and Circuits (VLSI)."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the International Symposium on High-Performance Computer Architecture (HPCA).","author":"Das Reetuparna","unstructured":"Reetuparna Das, Soumya Eachempati, Asit K. Mishra, Vijaykrishnan Narayanan, and Chita R. Das. 2009. Design and Evaluation of A Hierarchical On-chip Interconnect for Next-Generation CMPs. In Proceedings of the International Symposium on High-Performance Computer Architecture (HPCA)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2018.8310290"},{"key":"e_1_3_2_1_22_1","volume-title":"Neuflow: A Runtime Reconfigurable Dataflow Processor for Vision. In Computer Vision and Pattern Recognition Workshops (CVPRW).","author":"Farabet Cl\u00e9ment","year":"2011","unstructured":"Cl\u00e9ment Farabet, Berin Martini, Benoit Corda, Polina Akselrod, Eugenio Culurciello, and Yann LeCun. 2011. Neuflow: A Runtime Reconfigurable Dataflow Processor for Vision. In Computer Vision and Pattern Recognition Workshops (CVPRW)."},{"key":"e_1_3_2_1_23_1","volume-title":"International Symposium on Asynchronous Circuits and Systems (ASYNC).","author":"Fojtik Matthew","year":"2019","unstructured":"Matthew Fojtik, Ben Keller, Alicia Klinefelter, Nathaniel Pinckney, Stephen G. Tell, Brian Zimmer, Tezaswi Raja, Kevin Zhou, William J. Dally, and Brucek Khailany. 2019. A Fine-Grained GALS SoC with Pausible Adaptive Clocking in 16nm FinFET. In International Symposium on Asynchronous Circuits and Systems (ASYNC)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3037697.3037702"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3297858.3304014"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2017.7870257"},{"key":"e_1_3_2_1_29_1","unstructured":"Linley Gwennap. 2018. Graphcore Makes Big AI Splash. Microprocessor Report 618 (September 2018)."},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA).","author":"Han Song","unstructured":"Song Han, Xingyu Liu, Huizi Mao, Jing Pu, Ardavan Pedram, Mark A. Horowitz, and William J. Dally. 2016. EIE: Efficient Inference Engine on Compressed Deep Neural Network. In Proceedings of the International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1555754.1555779"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00059"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_34_1","volume-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. CoRR abs\/1704.04861","author":"Howard Andrew G.","year":"2017","unstructured":"Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. CoRR abs\/1704.04861 (2017). arXiv:1704.04861"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the International Conference on Supercomputing (ICS).","author":"Huh Jaehyuk","unstructured":"Jaehyuk Huh, Changky Kim, Hazim Shafi, Lixin Zhang, Doug Burger, and Stephen W. Keckler. 2005. A NUCA Substrate for Flexible CMP Cache Sharing. In Proceedings of the International Conference on Supercomputing (ICS)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCPMT.2015.2511626"},{"key":"e_1_3_2_1_37_1","volume-title":"Proceedings of the International Symposium on Microarchitecture (MICRO).","author":"Jerger Natalie Enright","unstructured":"Natalie Enright Jerger, Ajaykumar Kannan, Zimo Li, and Gabriel H. Loh. 2014. NoC Architectures for Silicon Interposer Systems. In Proceedings of the International Symposium on Microarchitecture (MICRO)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the International Symposium on Microarchitecture (MICRO).","author":"Kannan Ajaykumar","unstructured":"Ajaykumar Kannan, Natalie Enright Jerger, and Gabriel H. Loh. 2015. Enabling Interposer-based Disintegration of Multi-core Processors. In Proceedings of the International Symposium on Microarchitecture (MICRO)."},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the Conference on Neural Information Processing Systems (NeurIPS).","author":"Krizhevsky Alex","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton. 2012. Imagenet Classification with Deep Convolutional Neural Networks. In Proceedings of the Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173176"},{"key":"e_1_3_2_1_43_1","volume-title":"Proceedings of the International Solid State Circuits Conference (ISSCC).","author":"LeCun Yann","year":"2019","unstructured":"Yann LeCun. 2019. The Next Challenge in AI: Self-Supervised Learning. In Proceedings of the International Solid State Circuits Conference (ISSCC)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/2818950.2818951"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML).","author":"Mirhoseini Azalia","year":"2017","unstructured":"Azalia Mirhoseini, Hieu Pham, Quoc V. Le, Benoit Steiner, Rasmus Larsen, Yuefeng Zhou, Naveen Kumar, Mohammad Norouzi, Samy Bengio, and Jeff Dean. 2017. Device Placement Optimization with Reinforcement Learning. In Proceedings of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_48_1","unstructured":"NVIDIA 2018. NVIDIA TensorRT: Programmable Inference Accelerator. https:\/\/developer.nvidia.com\/tensorrt."},{"key":"e_1_3_2_1_49_1","unstructured":"NVIDIA. 2019. NVIDIA Tesla Deep Learning Product Performance. https:\/\/developer.nvidia.com\/deep-learning-performance-training-inference."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00042"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA).","author":"Parashar Angshuman","unstructured":"Angshuman Parashar, Minsoo Rhu, Anurag Mukkara, Antonio Puglielli, Rangharajan Venkatesan, Brucek Khailany, Joel Emer, Stephen W. Keckler, and William J. Dally. 2017. SCNN: An Accelerator for Compressed-sparse Convolutional Neural Networks. In Proceedings of the International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA).","author":"Qadeer Wajahat","unstructured":"Wajahat Qadeer, Rehan Hameed, Ofer Shacham, Preethi Venkatesan, Christos Kozyrakis, and Mark A. Horowitz. 2013. Convolution Engine: Balancing Efficiency & Flexibility in Specialized Computing. In Proceedings of the International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001165"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_55_1","unstructured":"Kirk Saban. 2012. Xilinx Stacked Silicon Interconnect Technology Delivers Breakthrough FPGA Capacity Bandwidth and Power Efficiency. http:\/\/www.xilinx.com\/support\/documentation\/white_papers\/wp380_Stacked_Silicon_Interconnect_Technology.pdf."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001139"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00069"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080221"},{"key":"e_1_3_2_1_59_1","unstructured":"Frans Sijstermans. 2018. The NVIDIA Deep Learning Accelerator. In Hot Chips."},{"key":"e_1_3_2_1_60_1","volume-title":"Very Deep Convolutional Networks for Large-scale Image Recognition. CoRR abs\/1408.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-scale Image Recognition. CoRR abs\/1408.1556 (2014). arXiv:1408.1556"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.55"},{"key":"e_1_3_2_1_62_1","volume-title":"International Conference on Computer-Aided Design (ICCAD).","author":"Stow Dylan","unstructured":"Dylan Stow, Yuan Xie, Taniya Siddiqua, and Gabriel H. Loh. 2017. Cost-effective Design of Scalable High-performance Systems using Active and Passive Inter-posers. In International Conference on Computer-Aided Design (ICCAD)."},{"key":"e_1_3_2_1_63_1","volume-title":"Proceedings of the Conference on Neural Information Processing Systems (NeurIPS).","author":"Sutskever Ilya","year":"2014","unstructured":"Ilya Sutskever, Oriol Vinyals, and Quoc V Le. 2014. Sequence to Sequence Learning with Neural Networks. In Proceedings of the Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123939.3123954"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080214"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2018.00058"},{"key":"e_1_3_2_1_69_1","volume-title":"WaveNet: A Generative Model for Raw Audio. CoRR abs\/1609.03499","author":"van den Oord A\u00e4ron","year":"2016","unstructured":"A\u00e4ron van den Oord, Sander Dieleman, Heiga Zen, Karen Simonyan, Oriol Vinyals, Alex Graves, Nal Kalchbrenner, Andrew W. Senior, and Koray Kavukcuoglu. 2016. WaveNet: A Generative Model for Raw Audio. CoRR abs\/1609.03499 (2016). arXiv:1609.03499"},{"key":"e_1_3_2_1_70_1","volume-title":"CoRR abs\/1706.03762","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. CoRR abs\/1706.03762 (2017). arXiv:1706.03762"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080244"},{"key":"e_1_3_2_1_72_1","volume-title":"Proceedings of the International Solid State Circuits Conference (ISSCC).","author":"Wilson John M.","unstructured":"John M. Wilson, Walker J. Turner, John W. Poulton, Brian Zimmer, Xi Chen, Sudhir S. Kudva, Sanquan Song, Stephen G. Tell, Nikola Nedovic, Wenxu Zhao, Sunil R. Sudhakaran, C. Thomas Gray, and William J. Dally. 2018. A 1.17pJ\/b 25Gb\/s\/pin Ground-referenced Single-ended Serial Link for Off- and On-package Communication in 16nm CMOS Using a Process- and Temperature-adaptive Voltage Regulator. In Proceedings of the International Solid State Circuits Conference (ISSCC)."},{"key":"e_1_3_2_1_73_1","volume-title":"CoRR abs\/1809.04070","author":"Yang Xuan","year":"2018","unstructured":"Xuan Yang, Mingyu Gao, Jing Pu, Ankita Nayak, Qiaoyi Liu, Steven Bell, Jeff Setter, Kaidi Cao, Heonjae Ha, Christos Kozyrakis, and Mark Horowitz. 2018. DNN Dataflow Choice Is Overrated. CoRR abs\/1809.04070 (2018). arXiv:1809.04070"},{"key":"e_1_3_2_1_74_1","volume-title":"Proceedings of the International Symposium on Computer Architecture (ISCA).","author":"Yin Jieming","unstructured":"Jieming Yin, Zhifeng Lin, Onur Kayiran, Matthew Poremba, Muhammad Shoaib Bin Altaf, Natalie Enright Jerger, and Gabriel H. Loh. 2018. Modular Routing Design for Chiplet-based Systems. In Proceedings of the International Symposium on Computer Architecture (ISCA)."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.23919\/VLSIC.2019.8778056"}],"event":{"name":"MICRO '52: The 52nd Annual IEEE\/ACM International Symposium on Microarchitecture","location":"Columbus OH USA","acronym":"MICRO '52","sponsor":["SIGMICRO ACM Special Interest Group on Microarchitectural Research and Processing","IEEE CS"]},"container-title":["Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358302","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3352460.3358302","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T22:29:05Z","timestamp":1753828145000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3352460.3358302"}},"subtitle":["Scaling Deep-Learning Inference with Multi-Chip-Module-Based Architecture"],"short-title":[],"issued":{"date-parts":[[2019,10,12]]},"references-count":76,"alternative-id":["10.1145\/3352460.3358302","10.1145\/3352460"],"URL":"https:\/\/doi.org\/10.1145\/3352460.3358302","relation":{},"subject":[],"published":{"date-parts":[[2019,10,12]]},"assertion":[{"value":"2019-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}