{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:20:07Z","timestamp":1750220407856,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,8]],"date-time":"2021-10-08T00:00:00Z","timestamp":1633651200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"MCIU\/AEI\/FEDER,UE","award":["RTI2018-098156-B-C5"],"award-info":[{"award-number":["RTI2018-098156-B-C5"]}]},{"name":"NSF OAC","award":["1909900"],"award-info":[{"award-number":["1909900"]}]},{"name":"Fundacion Seneca","award":["20749\/FPI\/18"],"award-info":[{"award-number":["20749\/FPI\/18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,14]]},"DOI":"10.1145\/3479876.3481602","type":"proceedings-article","created":{"date-parts":[[2021,10,5]],"date-time":"2021-10-05T19:14:45Z","timestamp":1633461285000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["A novel network fabric for efficient spatio-temporal reduction in flexible DNN accelerators"],"prefix":"10.1145","author":[{"given":"Francisco","family":"Mu\u00f1oz-Mart\u00ednez","sequence":"first","affiliation":[{"name":"Universidad de Murcia (Spain)"}]},{"given":"Jos\u00e9 L.","family":"Abell\u00e1n","sequence":"additional","affiliation":[{"name":"Universidad Cat\u00f3lica de Murcia (Spain)"}]},{"given":"Manuel E.","family":"Acacio","sequence":"additional","affiliation":[{"name":"Universidad de Murcia (Spain)"}]},{"given":"Tushar","family":"Krishna","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology (USA)"}]}],"member":"320","published-online":{"date-parts":[[2021,10,8]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Bluespec System Verilog (BSV). http:\/\/wiki.bluespec.com\/.  [n.d.]. Bluespec System Verilog (BSV). http:\/\/wiki.bluespec.com\/."},{"key":"e_1_3_2_1_2_1","unstructured":"[n.d.]. MAERI code v1. https:\/\/github.com\/hyoukjun\/MAERI.  [n.d.]. MAERI code v1. https:\/\/github.com\/hyoukjun\/MAERI."},{"key":"e_1_3_2_1_3_1","unstructured":"[n.d.]. SIGMA code v1. https:\/\/github.com\/georgia-tech-synergy-lab\/SIGMA.  [n.d.]. SIGMA code v1. https:\/\/github.com\/georgia-tech-synergy-lab\/SIGMA."},{"key":"e_1_3_2_1_4_1","article-title":"Origami: A 803-GOp\/s\/W Convolutional Network Accelerator","author":"Cavigelli Lukas","year":"2016","unstructured":"Lukas Cavigelli and Luca Benini . 2016 . Origami: A 803-GOp\/s\/W Convolutional Network Accelerator . IEEE Transactions on Circuits and Systems for Video Technology ( July 2016), 2461--2475. Lukas Cavigelli and Luca Benini. 2016. Origami: A 803-GOp\/s\/W Convolutional Network Accelerator. IEEE Transactions on Circuits and Systems for Video Technology (July 2016), 2461--2475.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313231.3352376"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313231.3352376"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2017.2749425"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2017.54"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_10_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv : 1810.04805v2 (2019) (May 2019). Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv preprint arXiv: 1810.04805v2 (2019) (May 2019)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_12_1","unstructured":"Vijay Janapa Reddi etal 2019. MLPerf Inference Benchmark. ArXiv: 1911.02549 (2019) (Dec. 2019).  Vijay Janapa Reddi et al. 2019. MLPerf Inference Benchmark. ArXiv: 1911.02549 (2019) (Dec. 2019)."},{"key":"e_1_3_2_1_13_1","volume-title":"Deep Residual Learning for Image Recognition. arXiv preprint arXiv: 1512.03385v1","author":"He Kaiming","year":"2015","unstructured":"Kaiming He , Xiangyu Zhang , Shaoqing Ren , and Jian Sun . 2015. Deep Residual Learning for Image Recognition. arXiv preprint arXiv: 1512.03385v1 ( 2015 ). Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep Residual Learning for Image Recognition. arXiv preprint arXiv: 1512.03385v1 (2015)."},{"key":"e_1_3_2_1_14_1","volume-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv preprint arXiv: 1704.04861 (2017) (April","author":"Howard Andrew G.","year":"2017","unstructured":"Andrew G. Howard , Menglong Zhu , Bo Chen , Dmitry Kalenichenko , Weijun Wang , Tobias Weyand , Marco Andreetto , and Hartwig Adam . 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv preprint arXiv: 1704.04861 (2017) (April 2017 ). Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv preprint arXiv: 1704.04861 (2017) (April 2017)."},{"key":"e_1_3_2_1_15_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt","author":"Iandola Forrest N.","year":"2016","unstructured":"Forrest N. Iandola , Song Han , Matthew W. Moskewicz , Khalid Ashraf , William J. Dally , and Kurt Keutzer . 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt ; 0.5MB model size. arXiv preprint: 1611.10012 ( 2016 ). Forrest N. Iandola, Song Han, Matthew W. Moskewicz, Khalid Ashraf, William J. Dally, and Kurt Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt; 0.5MB model size. arXiv preprint: 1611.10012 (2016)."},{"key":"e_1_3_2_1_16_1","volume-title":"Jouppi et al","author":"Norman","year":"2017","unstructured":"Norman P. Jouppi et al . 2017 . In-Datacenter Performance Analysis of a Tensor Processing Unit. In 44th Int'l Symp. on Computer Architecture (ISCA) . 1--12. Norman P. Jouppi et al. 2017. In-Datacenter Performance Analysis of a Tensor Processing Unit. In 44th Int'l Symp. on Computer Architecture (ISCA). 1--12."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Tushar Krishna Hyoukjun Kwon Angshuman Parashar Michael Pellauer and Ananda Samajdar. 2020. Data Orchestration in Deep Learning Accelerators. 1--164.  Tushar Krishna Hyoukjun Kwon Angshuman Parashar Michael Pellauer and Ananda Samajdar. 2020. Data Orchestration in Deep Learning Accelerators. 1--164.","DOI":"10.2200\/S01015ED1V01Y202005CAC052"},{"key":"e_1_3_2_1_18_1","volume-title":"ImageNet Classification with Deep Convolutional Neural Networks. International Conf. on Neural Information Processing Systems (NIPS) (Dec.","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky , Ilya Sutskever , and Geoffrey E. Hinton . 2012 . ImageNet Classification with Deep Convolutional Neural Networks. International Conf. on Neural Information Processing Systems (NIPS) (Dec. 2012 ), 1106--1114. Alex Krizhevsky, Ilya Sutskever, and Geoffrey E. Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. International Conf. on Neural Information Processing Systems (NIPS) (Dec. 2012), 1106--1114."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358252"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130218.3130230"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173176"},{"key":"e_1_3_2_1_22_1","volume-title":"Berg","author":"Liu Wei","year":"2015","unstructured":"Wei Liu , Dragomir Anguelov , Dumitru Erhan , Christian Szegedy , Scott Reed , Cheng-Yang Fu , and Alexander C . Berg . 2015 . SSD : Single Shot MultiBox Detector . arXiv preprint arXiv: 1512.02325v5 (2015) (Dec. 2015). Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2015. SSD: Single Shot MultiBox Detector. arXiv preprint arXiv: 1512.02325v5 (2015) (Dec. 2015)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2016.2574353"},{"key":"e_1_3_2_1_24_1","volume-title":"STONNE: Enabling Cycle-Level Microarchitectural Simulation for DNN Inference Accelerators","author":"Matr\u00ednez Francisco Mu\u00f1oz","year":"2021","unstructured":"Francisco Mu\u00f1oz Matr\u00ednez , Jos\u00e9 L. Abell\u00e1n , Manuel E. Acacio , and Tushar Krishna . 2021 . STONNE: Enabling Cycle-Level Microarchitectural Simulation for DNN Inference Accelerators . IEEE Computer Architecture Letters 01 (July 2021). Francisco Mu\u00f1oz Matr\u00ednez, Jos\u00e9 L. Abell\u00e1n, Manuel E. Acacio, and Tushar Krishna. 2021. STONNE: Enabling Cycle-Level Microarchitectural Simulation for DNN Inference Accelerators. IEEE Computer Architecture Letters 01 (July 2021)."},{"key":"e_1_3_2_1_25_1","volume-title":"SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. IEEE International Symposium on High-Performance Computer Architecture (HPCA) (March","author":"Qin Eric","year":"2020","unstructured":"Eric Qin , Ananda Samajdar , Hyoukjun Kwon , Vineet Nadella , Sudarshan Srinivasan , Dipankar Das , Bharat Kaul , and Tushar Krishna . 2020 . SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. IEEE International Symposium on High-Performance Computer Architecture (HPCA) (March 2020). Eric Qin, Ananda Samajdar, Hyoukjun Kwon, Vineet Nadella, Sudarshan Srinivasan, Dipankar Das, Bharat Kaul, and Tushar Krishna. 2020. SIGMA: A Sparse and Irregular GEMM Accelerator with Flexible Interconnects for DNN Training. IEEE International Symposium on High-Performance Computer Architecture (HPCA) (March 2020)."},{"key":"e_1_3_2_1_26_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint: 1409.1556v6","author":"Simonyan Karen","year":"2016","unstructured":"Karen Simonyan and Andrew Zisserman . 2016. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint: 1409.1556v6 ( 2016 ). Karen Simonyan and Andrew Zisserman. 2016. Very Deep Convolutional Networks for Large-Scale Image Recognition. arXiv preprint: 1409.1556v6 (2016)."},{"key":"e_1_3_2_1_27_1","volume-title":"Efficient Processing of Deep Neural Networks: A Tutorial and Survey. arXiv preprint arXiv: 1703.09039v2 (2017) (Aug","author":"Sze Vivienne","year":"2017","unstructured":"Vivienne Sze , Yu-Hsin Chen , Tien-Ju Yang , and Joel Emer . 2017. Efficient Processing of Deep Neural Networks: A Tutorial and Survey. arXiv preprint arXiv: 1703.09039v2 (2017) (Aug . 2017 ). Vivienne Sze, Yu-Hsin Chen, Tien-Ju Yang, and Joel Emer. 2017. Efficient Processing of Deep Neural Networks: A Tutorial and Survey. arXiv preprint arXiv: 1703.09039v2 (2017) (Aug. 2017)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2019.00040"}],"event":{"name":"NOCS '21: International Symposium on Networks-on-Chip","sponsor":["SIGBED ACM Special Interest Group on Embedded Systems","SIGDA ACM Special Interest Group on Design Automation","SIGARCH ACM Special Interest Group on Computer Architecture","IEEE CAS","IEEE Council on Electronic Design Automation (CEDA)"],"location":"Virtual Event","acronym":"NOCS '21"},"container-title":["Proceedings of the 15th IEEE\/ACM International Symposium on Networks-on-Chip"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3479876.3481602","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3479876.3481602","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:19:01Z","timestamp":1750191541000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3479876.3481602"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,8]]},"references-count":28,"alternative-id":["10.1145\/3479876.3481602","10.1145\/3479876"],"URL":"https:\/\/doi.org\/10.1145\/3479876.3481602","relation":{},"subject":[],"published":{"date-parts":[[2021,10,8]]},"assertion":[{"value":"2021-10-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}