{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T16:12:23Z","timestamp":1772727143587,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,12]],"date-time":"2024-08-12T00:00:00Z","timestamp":1723420800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100010661","name":"European Union's Horizon 2020","doi-asserted-by":"publisher","award":["957197"],"award-info":[{"award-number":["957197"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Swedish Foundation for Strategic Research","award":["CHI19-0048"],"award-info":[{"award-number":["CHI19-0048"]}]},{"name":"European High Performance Computing Joint Undertaking","award":["101036168"],"award-info":[{"award-number":["101036168"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,12]]},"DOI":"10.1145\/3673038.3673115","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T18:29:01Z","timestamp":1723141741000},"page":"629-639","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Scratchpad Memory Management for Deep Learning Accelerators"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-4419-6491","authenticated-orcid":false,"given":"Stavroula","family":"Zouzoula","sequence":"first","affiliation":[{"name":"Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9019-3605","authenticated-orcid":false,"given":"Mohammad Ali","family":"Maleki","sequence":"additional","affiliation":[{"name":"Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0477-4540","authenticated-orcid":false,"given":"Muhammad Waqar","family":"Azhar","sequence":"additional","affiliation":[{"name":"ZeroPoint Technologies, Sweden and Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2776-9253","authenticated-orcid":false,"given":"Pedro","family":"Trancoso","sequence":"additional","affiliation":[{"name":"Chalmers University of Technology, Sweden"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/774789.774805"},{"key":"e_1_3_2_1_3_1","volume-title":"Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934","author":"Bochkovskiy Alexey","year":"2020","unstructured":"Alexey Bochkovskiy, Chien-Yao Wang, and Hong-Yuan\u00a0Mark Liao. 2020. Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934 (2020)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/PACT52795.2021.00019"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2654822.2541967"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00050"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"e_1_3_2_1_8_1","unstructured":"Intel Corporation. 2024. Quick overview of Intel\u2019s Neural Processing Unit (NPU). https:\/\/intel.github.io\/intel-npu-acceleration-library\/npu.html"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS45731.2020.9180403"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2306.15552"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_13_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard G","year":"2017","unstructured":"Andrew\u00a0G Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.182"},{"key":"e_1_3_2_1_17_1","volume-title":"Why systolic architectures?Computer 15, 1","author":"Kung Hsiang-Tsung","year":"1982","unstructured":"Hsiang-Tsung Kung. 1982. Why systolic architectures?Computer 15, 1 (1982), 37\u201346."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2021.3082868"},{"key":"e_1_3_2_1_19_1","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 343\u2013348","author":"Li Jiajun","year":"2018","unstructured":"Jiajun Li, Guihai Yan, Wenyan Lu, Shuhao Jiang, Shijun Gong, Jingya Wu, and Xiaowei Li. 2018. SmartShuttle: Optimizing off-chip memory accesses for deep learning accelerators. In 2018 Design, Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 343\u2013348."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22166097"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2016.2574353"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657019"},{"key":"e_1_3_2_1_23_1","volume-title":"Embedded Machine Learning for Cyber-Physical, IoT, and Edge Computing: Hardware Architectures","author":"Vidya\u00a0Wicaksana Putra Rachmad","unstructured":"Rachmad Vidya\u00a0Wicaksana Putra, Muhammad\u00a0Abdullah Hanif, and Muhammad Shafique. 2023. Massively Parallel Neural Processing Array (MPNA): A CNN Accelerator for Embedded Systems. In Embedded Machine Learning for Cyber-Physical, IoT, and Edge Computing: Hardware Architectures. Springer, 3\u201324."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS48437.2020.00016"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_26_1","volume-title":"FPGA-based accelerators of deep learning networks for learning and classification: A review. ieee Access 7","author":"Shawahna Ahmad","year":"2018","unstructured":"Ahmad Shawahna, Sadiq\u00a0M Sait, and Aiman El-Maleh. 2018. FPGA-based accelerators of deep learning networks for learning and classification: A review. ieee Access 7 (2018), 7823\u20137859."},{"key":"e_1_3_2_1_27_1","volume-title":"Escher: A CNN accelerator with flexible buffering to minimize off-chip transfer. In 2017 IEEE 25Th annual international symposium on field-programmable custom computing machines (FCCM)","author":"Shen Yongming","year":"2017","unstructured":"Yongming Shen, Michael Ferdman, and Peter Milder. 2017. Escher: A CNN accelerator with flexible buffering to minimize off-chip transfer. In 2017 IEEE 25Th annual international symposium on field-programmable custom computing machines (FCCM). IEEE, 93\u2013100."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573527"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3532863"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_1_32_1","volume-title":"International conference on machine learning. PMLR, 6105\u20136114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105\u20136114."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI49217.2020.00051"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3575798"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO53902.2022.9741281"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3460776"},{"key":"e_1_3_2_1_37_1","first-page":"2860","article-title":"Heterogeneous systolic array architecture for compact cnns hardware accelerators","volume":"33","author":"Xu Rui","year":"2021","unstructured":"Rui Xu, Sheng Ma, Yaohua Wang, Yang Guo, Dongsheng Li, and Yuran Qiao. 2021. Heterogeneous systolic array architecture for compact cnns hardware accelerators. IEEE Transactions on Parallel and Distributed Systems 33, 11 (2021), 2860\u20132871.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"e_1_3_2_1_38_1","volume-title":"Proceedings of the Great Lakes Symposium on VLSI","author":"Yang Jiaqi","year":"2022","unstructured":"Jiaqi Yang, Hao Zheng, and Ahmed Louri. 2022. Adapt-Flow: A Flexible DNN Accelerator Architecture for Heterogeneous Dataflow Implementation. In Proceedings of the Great Lakes Symposium on VLSI 2022. 287\u2013292."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.vlsi.2022.10.006"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2022.3150030"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS45731.2020.9180844"},{"key":"e_1_3_2_1_42_1","volume-title":"RAINBOW: Multi-Dimensional Hardware-Software Co-Design for DL Accelerator On-Chip Memory. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). 352\u2013354","author":"Zouzoula Stavroula","year":"2023","unstructured":"Stavroula Zouzoula, Muhammad\u00a0Waqar Azhar, and Pedro Trancoso. 2023. RAINBOW: Multi-Dimensional Hardware-Software Co-Design for DL Accelerator On-Chip Memory. In 2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS). 352\u2013354."}],"event":{"name":"ICPP '24: the 53rd International Conference on Parallel Processing","location":"Gotland Sweden","acronym":"ICPP '24"},"container-title":["Proceedings of the 53rd International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673115","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3673038.3673115","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,23]],"date-time":"2025-09-23T17:32:55Z","timestamp":1758648775000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3673038.3673115"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,12]]},"references-count":42,"alternative-id":["10.1145\/3673038.3673115","10.1145\/3673038"],"URL":"https:\/\/doi.org\/10.1145\/3673038.3673115","relation":{},"subject":[],"published":{"date-parts":[[2024,8,12]]},"assertion":[{"value":"2024-08-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}