{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T13:56:24Z","timestamp":1780494984146,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,29]],"date-time":"2022-08-29T00:00:00Z","timestamp":1661731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,29]]},"DOI":"10.1145\/3545008.3545086","type":"proceedings-article","created":{"date-parts":[[2023,1,15]],"date-time":"2023-01-15T01:04:08Z","timestamp":1673744648000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["DSSA: Dual-Side Sparse Systolic Array Architecture for Accelerating Convolutional Neural Network Training"],"prefix":"10.1145","author":[{"given":"Zhengbo","family":"Chen","sequence":"first","affiliation":[{"name":"Information Engineering University, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qi","family":"Yu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mathematical Engineering and Advanced Computing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fang","family":"Zheng","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mathematical Engineering and Advanced Computing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Feng","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mathematical Engineering and Advanced Computing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zuoning","family":"Chen","sequence":"additional","affiliation":[{"name":"Chinese Academy of Engineering, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2023,1,13]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"L.\u00a0J. Ba and R. Caruana. 2013. Do deep nets really need to be deep?arXiv preprint arXiv:1312.6184(2013)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"N. Bell and M. Garland. 2009. Implementing sparse matrix-vector multiplication on throughput-oriented processors. In SC. 1\u201311.","DOI":"10.1145\/1654059.1654078"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"e_1_3_2_1_4_1","volume-title":"ARTICLE","author":"Collobert R.","year":"2011","unstructured":"R. Collobert, J. Weston, 2011. Natural language processing (almost) from scratch. Journal of machine learning research 12, ARTICLE (2011), 2493\u20132537."},{"key":"e_1_3_2_1_5_1","unstructured":"M. Denil B. Shakibi 2013. Predicting parameters in deep learning. arXiv preprint arXiv:1306.0543(2013)."},{"key":"e_1_3_2_1_6_1","volume-title":"Gemmini: An agile systolic array generator enabling systematic evaluations of deep-learning architectures. arXiv preprint arXiv:1911.09925 3","author":"Genc H.","year":"2019","unstructured":"H. Genc, A. Haj-Ali, 2019. Gemmini: An agile systolic array generator enabling systematic evaluations of deep-learning architectures. arXiv preprint arXiv:1911.09925 3 (2019), 25."},{"key":"e_1_3_2_1_7_1","unstructured":"Y. Gong L. Liu 2014. Compressing deep convolutional networks using vector quantization. arXiv preprint arXiv:1412.6115(2014)."},{"key":"e_1_3_2_1_8_1","unstructured":"S. Gupta A. Agrawal 2015. Deep learning with limited numerical precision. In ICML. 1737\u20131746."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"e_1_3_2_1_10_1","unstructured":"S. Han H. Mao 2015. Deep compression: Compressing deep neural networks with pruning trained quantization and huffman coding. arXiv preprint arXiv:1510.00149(2015)."},{"key":"e_1_3_2_1_11_1","unstructured":"B. Hassibi and D.\u00a0G. Stork. 1993. Second order derivatives for network pruning: Optimal brain surgeon. Morgan Kaufmann."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"K. He X. Zhang 2016. Deep residual learning for image recognition. In CVPR. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3392717.3392751"},{"key":"e_1_3_2_1_14_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861(2017).","author":"Howard G.","year":"2017","unstructured":"A.\u00a0G. Howard, M. Zhu, 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861(2017)."},{"key":"e_1_3_2_1_15_1","unstructured":"H. Hu R. Peng Y.-Wing Tai 2016. Network trimming: A data-driven neuron pruning approach towards efficient deep architectures. arXiv preprint arXiv:1607.03250(2016)."},{"key":"e_1_3_2_1_16_1","unstructured":"F.\u00a0N. Iandola S. Han 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360(2016)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"M. Jaderberg A. Vedaldi 2014. Speeding up convolutional neural networks with low rank expansions. arXiv preprint arXiv:1405.3866(2014).","DOI":"10.5244\/C.28.88"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"e_1_3_2_1_19_1","unstructured":"N.\u00a0P. Jouppi C. Young 2017. In-datacenter performance analysis of a tensor processing unit. In ISCA. 1\u201312."},{"key":"e_1_3_2_1_20_1","unstructured":"A. Korattikara V. Rathod 2015. Bayesian dark knowledge. arXiv preprint arXiv:1506.04416(2015)."},{"key":"e_1_3_2_1_21_1","volume-title":"Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky, I. Sutskever, 2012. Imagenet classification with deep convolutional neural networks. Advances in neural information processing systems 25 (2012), 1097\u20131105."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"HT Kung B. McDanel 2019. Packing sparse convolutional neural networks for efficient systolic array implementations: Column combining under joint optimization. In ASPLOS. 821\u2013834.","DOI":"10.1145\/3297858.3304028"},{"key":"e_1_3_2_1_23_1","volume-title":"Why systolic architectures?Computer 15, 01","author":"Kung Hsiang-Tsung","year":"1982","unstructured":"Hsiang-Tsung Kung. 1982. Why systolic architectures?Computer 15, 01 (1982), 37\u201346."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_25_1","unstructured":"Y. LeCun J.\u00a0S. Denker 1990. Optimal brain damage. In Advances in neural information processing systems. 598\u2013605."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"e_1_3_2_1_27_1","unstructured":"Zhi. Liu P.\u00a0N. Whatmough 2020. Sparse systolic tensor array for efficient CNN hardware acceleration. arXiv preprint arXiv:2009.02381(2020)."},{"key":"e_1_3_2_1_28_1","unstructured":"A. Mishra J.\u00a0A. Latorre 2021. Accelerating sparse deep neural networks. arXiv preprint arXiv:2104.08378(2021)."},{"key":"e_1_3_2_1_29_1","unstructured":"Nvidia. 2020. NVIDIA A100 Tensor Core GPU Architecture. (2020)."},{"key":"e_1_3_2_1_30_1","volume-title":"Outerspace: An outer product based sparse matrix multiplication accelerator. In HPCA. 724\u2013736.","author":"Pal S.","year":"2018","unstructured":"S. Pal, J. Beaumont, 2018. Outerspace: An outer product based sparse matrix multiplication accelerator. In HPCA. 724\u2013736."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"J. Peltenburg S. Ren 2016. Maximizing systolic array efficiency to accelerate the PairHMM forward algorithm. In BIBM. 758\u2013762.","DOI":"10.1109\/BIBM.2016.7822616"},{"key":"e_1_3_2_1_33_1","volume-title":"Sigma: A sparse and irregular gemm accelerator with flexible interconnects for dnn training. In HPCA. 58\u201370.","author":"Qin E.","year":"2020","unstructured":"E. Qin, A. Samajdar, 2020. Sigma: A sparse and irregular gemm accelerator with flexible interconnects for dnn training. In HPCA. 58\u201370."},{"key":"e_1_3_2_1_34_1","unstructured":"J. Redmon and A. Farhadi. 2018. Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767(2018)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"F. Shi H. Li 2018. Sparse Winograd Convolutional neural networks on small-scale systolic arrays. arXiv preprint arXiv:1810.01973(2018).","DOI":"10.1145\/3289602.3293939"},{"key":"e_1_3_2_1_36_1","unstructured":"K. Simonyan and A. Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556(2014)."},{"key":"e_1_3_2_1_37_1","volume-title":"Matraptor: A sparse-sparse matrix multiplication accelerator based on row-wise product. In MICRO. 766\u2013780.","author":"Srivastava N.","year":"2020","unstructured":"N. Srivastava, H. Jin, 2020. Matraptor: A sparse-sparse matrix multiplication accelerator based on row-wise product. In MICRO. 766\u2013780."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Y. Wang C. Zhang 2021. Dual-side Sparse Tensor Core. arXiv preprint arXiv:2105.09564(2021).","DOI":"10.1109\/ISCA52012.2021.00088"},{"key":"e_1_3_2_1_39_1","volume-title":"Learning structured sparsity in deep neural networks. Advances in neural information processing systems 29","author":"Wen W.","year":"2016","unstructured":"W. Wen, C. Wu, 2016. Learning structured sparsity in deep neural networks. Advances in neural information processing systems 29 (2016), 2074\u20132082."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Z. Yang L. Wang 2018. Systolic array based accelerator and algorithm mapping for deep learning algorithms. In NPC. 153\u2013158.","DOI":"10.1007\/978-3-030-05677-3_16"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"G. Zhang N. Attaluri 2021. Gamma: leveraging Gustavson\u2019s algorithm to accelerate sparse matrix multiplication. In ASPLOS. 687\u2013701.","DOI":"10.1145\/3445814.3446702"},{"key":"e_1_3_2_1_42_1","volume-title":"Sparch: Efficient architecture for sparse matrix multiplication. In HPCA. 261\u2013274.","author":"Zhang Z.","year":"2020","unstructured":"Z. Zhang, H. Wang, 2020. Sparch: Efficient architecture for sparse matrix multiplication. In HPCA. 261\u2013274."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"M. Zhu and Y. Xie. 2020. Taming unstructured sparsity on GPUs via latency-aware optimization. In DAC. 1\u20136.","DOI":"10.1109\/DAC18072.2020.9218644"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"M. Zhu T. Zhang 2019. Sparse tensor core: Algorithm and hardware co-design for vector-wise sparse neural networks on modern gpus. In MICRO. 359\u2013371.","DOI":"10.1145\/3352460.3358269"}],"event":{"name":"ICPP '22: 51st International Conference on Parallel Processing","location":"Bordeaux France","acronym":"ICPP '22"},"container-title":["Proceedings of the 51st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545086","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3545008.3545086","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:02:44Z","timestamp":1750186964000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3545008.3545086"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,29]]},"references-count":44,"alternative-id":["10.1145\/3545008.3545086","10.1145\/3545008"],"URL":"https:\/\/doi.org\/10.1145\/3545008.3545086","relation":{},"subject":[],"published":{"date-parts":[[2022,8,29]]},"assertion":[{"value":"2023-01-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}