{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:00:28Z","timestamp":1750309228696,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T00:00:00Z","timestamp":1737331200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,1,20]]},"DOI":"10.1145\/3658617.3697548","type":"proceedings-article","created":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T14:32:21Z","timestamp":1741098741000},"page":"23-29","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Computation and Energy Efficient Hardware Architecture for SSL Acceleration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7202-1681","authenticated-orcid":false,"given":"Huidong","family":"Ji","sequence":"first","affiliation":[{"name":"Fudan Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4221-3143","authenticated-orcid":false,"given":"Sheng","family":"Li","sequence":"additional","affiliation":[{"name":"Univ. of Pittsburgh, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4050-6415","authenticated-orcid":false,"given":"Yue","family":"Cao","sequence":"additional","affiliation":[{"name":"Fudan Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8663-7760","authenticated-orcid":false,"given":"Chen","family":"Ding","sequence":"additional","affiliation":[{"name":"Guangdong Institute of Intelligence Science and Technology, Zhuhai, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6192-558X","authenticated-orcid":false,"given":"Jiawei","family":"Xu","sequence":"additional","affiliation":[{"name":"Royal Inst. of Tech., Stockholm, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1280-0334","authenticated-orcid":false,"given":"Qitao","family":"Tan","sequence":"additional","affiliation":[{"name":"Univ. of Georgia, Athens, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3808-4599","authenticated-orcid":false,"given":"Jun","family":"Liu","sequence":"additional","affiliation":[{"name":"Northeastern Univ., Boston, Massachusetts, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1927-8606","authenticated-orcid":false,"given":"Ao","family":"Li","sequence":"additional","affiliation":[{"name":"Univ. of Arizona, Tucson, Arizona, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3385-2053","authenticated-orcid":false,"given":"Xulong","family":"Tang","sequence":"additional","affiliation":[{"name":"Univ. of Pittsburgh, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9588-0239","authenticated-orcid":false,"given":"Lirong","family":"Zheng","sequence":"additional","affiliation":[{"name":"Fudan Univ., Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9844-992X","authenticated-orcid":false,"given":"Geng","family":"Yuan","sequence":"additional","affiliation":[{"name":"Univ. of Georgia, Athens, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8546-1329","authenticated-orcid":false,"given":"Zhuo","family":"Zou","sequence":"additional","affiliation":[{"name":"Fudan Univ., Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2020. DSP48E Architecture. Vivado Design Suite Reference Guide: Model-Based DSP Design Using System Generator (UG958)."},{"key":"e_1_3_2_1_2_1","volume-title":"European Conference on Computer Vision. Springer, 523--538","author":"Addepalli Sravanti","year":"2022","unstructured":"Sravanti Addepalli, Kaushal Bhogale, Priyam Dey, and R Venkatesh Babu. 2022. Towards efficient and effective self-supervised learning of visual representations. In European Conference on Computer Vision. Springer, 523--538."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570928"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.micpro.2022.104666"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2020.3048260"},{"key":"e_1_3_2_1_6_1","volume-title":"Understanding the potential of fpga-based spatial acceleration for large language model inference. arXiv preprint arXiv:2312.15159","author":"Chen Hongzheng","year":"2023","unstructured":"Hongzheng Chen, Jiahao Zhang, Yixiao Du, Shaojie Xiang, Zichao Yue, Niansong Zhang, Yaohui Cai, and Zhiru Zhang. 2023. Understanding the potential of fpga-based spatial acceleration for large language model inference. arXiv preprint arXiv:2312.15159 (2023)."},{"key":"e_1_3_2_1_7_1","volume-title":"International conference on machine learning. PMLR, 1597--1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International conference on machine learning. PMLR, 1597--1607."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"e_1_3_2_1_9_1","volume-title":"An Analysis of Single-Layer Networks in Unsupervised Feature Learning. In International Conference on Artificial Intelligence and Statistics. https:\/\/api.semanticscholar.org\/CorpusID:308212","author":"Coates Adam","year":"2011","unstructured":"Adam Coates, A. Ng, and Honglak Lee. 2011. An Analysis of Single-Layer Networks in Unsupervised Feature Learning. In International Conference on Artificial Intelligence and Statistics. https:\/\/api.semanticscholar.org\/CorpusID:308212"},{"key":"e_1_3_2_1_10_1","volume-title":"Coatnet: Marrying convolution and attention for all data sizes. Advances in neural information processing systems 34","author":"Dai Zihang","year":"2021","unstructured":"Zihang Dai, Hanxiao Liu, Quoc V Le, and Mingxing Tan. 2021. Coatnet: Marrying convolution and attention for all data sizes. Advances in neural information processing systems 34 (2021), 3965--3977."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3144017"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495532"},{"key":"e_1_3_2_1_14_1","volume-title":"Zhaohan Guo, Mohammad Gheshlaghi Azar, et al.","author":"Grill Jean-Bastien","year":"2020","unstructured":"Jean-Bastien Grill, Florian Strub, Florent Altch\u00e9, Corentin Tallec, Pierre Richemond, Elena Buchatskaya, Carl Doersch, Bernardo Avila Pires, Zhaohan Guo, Mohammad Gheshlaghi Azar, et al. 2020. Bootstrap your own latent-a new approach to self-supervised learning. Advances in neural information processing systems 33 (2020), 21271--21284."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_16_1","volume-title":"2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 84--96","author":"Hojabr Reza","year":"2021","unstructured":"Reza Hojabr, Ali Sedaghati, Amirali Sharifian, Ahmad Khonsari, and Arrvindh Shriraman. 2021. Spaghetti: Streaming accelerators for highly sparse gemm on fpgas. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA). IEEE, 84--96."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2021.3108762"},{"key":"e_1_3_2_1_18_1","volume-title":"Densely Connected Convolutional Networks. 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Huang Gao","year":"2016","unstructured":"Gao Huang, Zhuang Liu, and Kilian Q. Weinberger. 2016. Densely Connected Convolutional Networks. 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 2261--2269. https:\/\/api.semanticscholar.org\/CorpusID:9433631"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2023.3258411"},{"key":"e_1_3_2_1_20_1","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 5654--5664","author":"Ko\u00e7yi\u011fit Mustafa Taha","year":"2023","unstructured":"Mustafa Taha Ko\u00e7yi\u011fit, Timothy M Hospedales, and Hakan Bilen. 2023. Accelerating self-supervised learning via efficient training strategies. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 5654--5664."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"e_1_3_2_1_22_1","unstructured":"Alex Krizhevsky. 2009. Learning Multiple Layers of Features from Tiny Images. https:\/\/api.semanticscholar.org\/CorpusID:18268744"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC.2019.8662302"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.3009373"},{"key":"e_1_3_2_1_25_1","volume-title":"MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture. 992--1004","author":"Li Shiyu","year":"2021","unstructured":"Shiyu Li, Edward Hanson, Xuehai Qian, Hai\" Helen\" Li, and Yiran Chen. 2021. ESCALATE: Boosting the efficiency of sparse CNN accelerator with kernel decomposition. In MICRO-54: 54th Annual IEEE\/ACM International Symposium on Microarchitecture. 992--1004."},{"key":"e_1_3_2_1_26_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Li Sheng","year":"2023","unstructured":"Sheng Li, Chao Wu, Ao Li, Yanzhi Wang, Xulong Tang, and Geng Yuan. 2023. Waxing-and-Waning: a Generic Similarity-based Framework for Efficient Self-Supervised Learning. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_27_1","unstructured":"Subhransu Maji Esa Rahtu Juho Kannala Matthew Blaschko and Andrea Vedaldi. 2013. Fine-Grained Visual Classification of Aircraft. arXiv:1306.5151 [cs.CV]"},{"volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 6707--6717","author":"Misra Ishan","key":"e_1_3_2_1_28_1","unstructured":"Ishan Misra and Laurens van der Maaten. 2020. Self-supervised learning of pretext-invariant representations. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 6707--6717."},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2674--2683","author":"Reed Colorado J","year":"2021","unstructured":"Colorado J Reed, Sean Metzger, Aravind Srinivas, Trevor Darrell, and Kurt Keutzer. 2021. Selfaugment: Automatic augmentation policies for self-supervised learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2674--2683."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3273287"},{"key":"e_1_3_2_1_31_1","volume-title":"Belongie","author":"Wah Catherine","year":"2011","unstructured":"Catherine Wah, Steve Branson, Peter Welinder, Pietro Perona, and Serge J. Belongie. 2011. The Caltech-UCSD Birds-200-2011 Dataset. https:\/\/api.semanticscholar.org\/CorpusID:16119123"},{"key":"e_1_3_2_1_32_1","first-page":"2893","article-title":"A block PatchMatch-based energy-resource efficient stereo matching processor on FPGA","volume":"69","author":"Wang Hongyu","year":"2022","unstructured":"Hongyu Wang, Wei Zhou, Xiangyu Zhang, and Xin Lou. 2022. A block PatchMatch-based energy-resource efficient stereo matching processor on FPGA. IEEE Transactions on Circuits and Systems I: Regular Papers 69, 7 (2022), 2893--2905.","journal-title":"IEEE Transactions on Circuits and Systems I: Regular Papers"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2022.3140730"},{"key":"e_1_3_2_1_34_1","volume-title":"Wider or deeper: Revisiting the resnet model for visual recognition. Pattern recognition 90","author":"Wu Zifeng","year":"2019","unstructured":"Zifeng Wu, Chunhua Shen, and Anton Van Den Hengel. 2019. Wider or deeper: Revisiting the resnet model for visual recognition. Pattern recognition 90 (2019), 119--133."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSI.2021.3074300"},{"key":"e_1_3_2_1_36_1","first-page":"3753","article-title":"Seed the views: Hierarchical semantic alignment for contrastive representation learning","volume":"45","author":"Xu Haohang","year":"2022","unstructured":"Haohang Xu, Xiaopeng Zhang, Hao Li, Lingxi Xie, Wenrui Dai, Hongkai Xiong, and Qi Tian. 2022. Seed the views: Hierarchical semantic alignment for contrastive representation learning. IEEE Transactions on Pattern Analysis and Machine Intelligence 45, 3 (2022), 3753--3767.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241563"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00029"},{"key":"e_1_3_2_1_39_1","first-page":"19061","article-title":"Layer freezing & data sieving: missing pieces of a generic framework for sparse training","volume":"35","author":"Yuan Geng","year":"2022","unstructured":"Geng Yuan, Yanyu Li, Sheng Li, Zhenglun Kong, Sergey Tulyakov, Xulong Tang, Yanzhi Wang, and Jian Ren. 2022. Layer freezing & data sieving: missing pieces of a generic framework for sparse training. Advances in Neural Information Processing Systems 35 (2022), 19061--19074.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 16580--16589","author":"Zhang Tong","year":"2022","unstructured":"Tong Zhang, Congpei Qiu, Wei Ke, Sabine S\u00fcsstrunk, and Mathieu Salzmann. 2022. Leverage your local and global representations: A new self-supervised learning strategy. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 16580--16589."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3179016"},{"key":"e_1_3_2_1_42_1","first-page":"2543","article-title":"Ressl: Relational self-supervised learning with weak augmentation","volume":"34","author":"Zheng Mingkai","year":"2021","unstructured":"Mingkai Zheng, Shan You, Fei Wang, Chen Qian, Changshui Zhang, Xiaogang Wang, and Chang Xu. 2021. Ressl: Relational self-supervised learning with weak augmentation. Advances in Neural Information Processing Systems 34 (2021), 2543--2555.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"ASPDAC '25: 30th Asia and South Pacific Design Automation Conference","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEICE","IPSJ","IEEE CAS","IEEE CEDA"],"location":"Tokyo Japan","acronym":"ASPDAC '25"},"container-title":["Proceedings of the 30th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697548","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658617.3697548","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:18Z","timestamp":1750290258000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697548"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,20]]},"references-count":42,"alternative-id":["10.1145\/3658617.3697548","10.1145\/3658617"],"URL":"https:\/\/doi.org\/10.1145\/3658617.3697548","relation":{},"subject":[],"published":{"date-parts":[[2025,1,20]]},"assertion":[{"value":"2025-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}