{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:24:11Z","timestamp":1750220651336,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,3,22]],"date-time":"2021-03-22T00:00:00Z","timestamp":1616371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100017055","name":"NSFC-Shandong Joint Fund","doi-asserted-by":"publisher","award":["U1806203"],"award-info":[{"award-number":["U1806203"]}],"id":[{"id":"10.13039\/100017055","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018532","name":"Major Scientific and Technological Innovation Project of Shandong Province","doi-asserted-by":"publisher","award":["2019JZZY010449"],"award-info":[{"award-number":["2019JZZY010449"]}],"id":[{"id":"10.13039\/501100018532","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,3,22]]},"DOI":"10.1145\/3412841.3441893","type":"proceedings-article","created":{"date-parts":[[2021,4,23]],"date-time":"2021-04-23T05:09:16Z","timestamp":1619154556000},"page":"126-134","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Improving CNN performance on FPGA clusters through topology exploration"],"prefix":"10.1145","author":[{"given":"Ruihao","family":"Li","sequence":"first","affiliation":[{"name":"Shandong University, Qingdao, Shandong, China"}]},{"given":"Ke","family":"Liu","sequence":"additional","affiliation":[{"name":"Shandong Normal University, Jinan, Shandong, China"}]},{"given":"Xiaojun","family":"Cai","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, Shandong, China"}]},{"given":"Mengying","family":"Zhao","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, Shandong, China"}]},{"given":"Lizy K.","family":"John","sequence":"additional","affiliation":[{"name":"The University of Texas at Austin"}]},{"given":"Zhiping","family":"Jia","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, Shandong, China"}]}],"member":"320","published-online":{"date-parts":[[2021,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP49362.2020.00018"},{"key":"e_1_3_2_1_2_1","volume-title":"Slide: In defense of smart algorithms over hardware acceleration for large-scale deep learning systems. arXiv preprint arXiv:1903.03129","author":"Chen Beidi","year":"2019","unstructured":"Beidi Chen, Tharun Medini, James Farwell, Sameh Gobriel, Charlie Tai, and Anshumali Shrivastava. Slide: In defense of smart algorithms over hardware acceleration for large-scale deep learning systems. arXiv preprint arXiv:1903.03129, 2019."},{"key":"e_1_3_2_1_3_1","first-page":"578","volume-title":"13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et al. {TVM}: An automated end-to-end optimizing compiler for deep learning. In 13th {USENIX} Symposium on Operating Systems Design and Implementation ({OSDI} 18), pages 578--594, 2018."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_5_1","volume-title":"Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices","author":"Chen Yu-Hsin","year":"2019","unstructured":"Yu-Hsin Chen, Tien-Ju Yang, Joel Emer, and Vivienne Sze. Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices. IEEE Journal on Emerging and Selected Topics in Circuits and Systems, 9(2):292--308, 2019."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00012"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2018.00021"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/3122009.3242044"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Weiwen Jiang Edwin H-M Sha Xinyi Zhang Lei Yang Qingfeng Zhuge Yiyu Shi and Jingtong Hu. Achieving super-linear speedup across multi-fpga for real-time dnn inference. ACM Transactions on Embedded Computing Systems (TECS) 18(5s):1--23 2019.","DOI":"10.1145\/3358192"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2018.2857098"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2020.3012863"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317757"},{"key":"e_1_3_2_1_15_1","first-page":"1097","volume-title":"Advances in neural information processing systems","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems, pages 1097--1105. MIT Press, 2012."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_17_1","first-page":"319","volume-title":"The 2020 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays","author":"Li Ruihao","unstructured":"Ruihao Li, Ke Liu, Mengying Zhao, Zhaoyan Shen, Xiaojun Cai, and Zhiping Jia. Maximizing cnn throughput on fpga clusters. In The 2020 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays, page 319. ACM, 2020."},{"key":"e_1_3_2_1_18_1","volume-title":"Vta: An open hardware-software stack for deep learning. arXiv preprint arXiv:1807.04188","author":"Moreau Thierry","year":"2018","unstructured":"Thierry Moreau, Tianqi Chen, Ziheng Jiang, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. Vta: An open hardware-software stack for deep learning. arXiv preprint arXiv:1807.04188, 2018."},{"key":"e_1_3_2_1_19_1","volume-title":"Deep learning training in facebook data centers: Design of scale-up and scale-out systems. arXiv preprint arXiv:2003.09518","author":"Naumov Maxim","year":"2020","unstructured":"Maxim Naumov, John Kim, Dheevatsa Mudigere, Srinivas Sridharan, Xiaodong Wang, Whitney Zhao, Serhat Yilmaz, Changkyu Kim, Hector Yuen, Mustafa Ozdal, et al. Deep learning training in facebook data centers: Design of scale-up and scale-out systems. arXiv preprint arXiv:2003.09518, 2020."},{"key":"e_1_3_2_1_20_1","unstructured":"Joseph Redmon. Darknet: Open source neural networks in c. http:\/\/pjreddie.com\/darknet\/ 2013--2016."},{"key":"e_1_3_2_1_21_1","volume-title":"Scale-sim: Systolic cnn accelerator simulator. arXiv preprint arXiv:1811.02883","author":"Samajdar Ananda","year":"2018","unstructured":"Ananda Samajdar, Yuhao Zhu, Paul Whatmough, Matthew Mattina, and Tushar Krishna. Scale-sim: Systolic cnn accelerator simulator. arXiv preprint arXiv:1811.02883, 2018."},{"key":"e_1_3_2_1_22_1","unstructured":"Xilinx Inc.:\"LVDS Source Synchronous 7:1 Serialization and Deserialization Using Clock Multiplication\". Xapp585."},{"key":"e_1_3_2_1_23_1","article-title":"Efficient allocation of multi-kernel applications on multi-fpga platforms","author":"Shan Junnan","year":"2020","unstructured":"Junnan Shan, Mihai T Lazarescu, Jordi Cortadella, Luciano Lavagno, and Mario R Casu. Cnn-on-aws: Efficient allocation of multi-kernel applications on multi-fpga platforms. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2020.","journal-title":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080221"},{"key":"e_1_3_2_1_25_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556, 2014."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3298023.3298188"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021744"},{"key":"e_1_3_2_1_28_1","unstructured":"Xilinx Inc.:\"Aurora 64B\/66B v11.1 Product Guide.\". pg074."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2016.22"},{"key":"e_1_3_2_1_30_1","volume-title":"Toolflows for mapping convolutional neural networks on fpgas: A survey and future directions. ACM Computing Surveys (CSUR), 51(3):56","author":"Venieris Stylianos I","year":"2018","unstructured":"Stylianos I Venieris, Alexandros Kouris, and Christos-Savvas Bouganis. Toolflows for mapping convolutional neural networks on fpgas: A survey and future directions. ACM Computing Surveys (CSUR), 51(3):56, 2018."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317875"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP49362.2020.00017"},{"key":"e_1_3_2_1_33_1","unstructured":"Xilinx. Accelerating dnns with xilinx alveo accelerator cards."},{"volume-title":"Xilinx products overview. https:\/\/www.xilinx.com\/products\/silicon-devices\/fpga.html","year":"2020","key":"e_1_3_2_1_34_1","unstructured":"Xilinx. Xilinx products overview. https:\/\/www.xilinx.com\/products\/silicon-devices\/fpga.html, 2020."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2684746.2689060"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2934583.2934644"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00051"}],"event":{"name":"SAC '21: The 36th ACM\/SIGAPP Symposium on Applied Computing","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"],"location":"Virtual Event Republic of Korea","acronym":"SAC '21"},"container-title":["Proceedings of the 36th Annual ACM Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412841.3441893","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3412841.3441893","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:02:23Z","timestamp":1750197743000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3412841.3441893"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3,22]]},"references-count":37,"alternative-id":["10.1145\/3412841.3441893","10.1145\/3412841"],"URL":"https:\/\/doi.org\/10.1145\/3412841.3441893","relation":{},"subject":[],"published":{"date-parts":[[2021,3,22]]},"assertion":[{"value":"2021-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}