{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:04:15Z","timestamp":1774631055422,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":21,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T00:00:00Z","timestamp":1706659200000},"content-version":"vor","delay-in-days":380,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["FA8650-20-2-7009"],"award-info":[{"award-number":["FA8650-20-2-7009"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Internship at Intel Strategic CAD Labs"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,1,16]]},"DOI":"10.1145\/3566097.3567863","type":"proceedings-article","created":{"date-parts":[[2023,1,31]],"date-time":"2023-01-31T18:40:49Z","timestamp":1675190449000},"page":"475-482","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Reusing GEMM Hardware for Efficient Execution of Depthwise Separable Convolution on ASIC-Based DNN Accelerators"],"prefix":"10.1145","author":[{"given":"Susmita Dey","family":"Manasi","sequence":"first","affiliation":[{"name":"University of Minnesota Twin Cities"}]},{"given":"Suvadeep","family":"Banerjee","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Abhijit","family":"Davare","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Anton A.","family":"Sorokin","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Steven M.","family":"Burns","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Desmond A.","family":"Kirkpatrick","sequence":"additional","affiliation":[{"name":"Intel Labs"}]},{"given":"Sachin S.","family":"Sapatnekar","sequence":"additional","affiliation":[{"name":"University of Minnesota Twin Cities"}]}],"member":"320","published-online":{"date-parts":[[2023,1,31]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,\" arXiv:1704.04861","author":"Howard A. G.","year":"2017","unstructured":"A. G. Howard, et al., \"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications,\" arXiv:1704.04861, 2017."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"},{"key":"e_1_3_2_1_3_1","first-page":"6105","volume-title":"Int. Conf. on Machine Learning","volume":"97","author":"Tan M.","year":"2019","unstructured":"M. Tan and Q. Le, \"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,\" in Proc. Int. Conf. on Machine Learning, vol. 97, pp. 6105--6114, June 2019."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"},{"key":"e_1_3_2_1_6_1","first-page":"1","volume-title":"ISCA","author":"Jouppi N. P.","year":"2017","unstructured":"N. P. Jouppi et al., \"In-datacenter Performance Analysis of a Tensor Processing Unit,\" in Proc. ISCA, pp. 1--12, Jun. 2017."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2019.2928962"},{"key":"e_1_3_2_1_8_1","volume-title":"ICCAD","author":"Esmaeilzadeh H.","year":"2021","unstructured":"H. Esmaeilzadeh, et al., \"VeriGOOD-ML: An Open-Source Flow for Automated ML Hardware Synthesis,\" in Proc. ICCAD, 2021."},{"key":"e_1_3_2_1_9_1","volume-title":"Nov.","author":"Genc H.","year":"2019","unstructured":"H. Genc, et al., \"Gemmini: An agile systolic array generator enabling systematic evaluations of deep-learning architectures,\" arXiv:1911.09925, Nov. 2019."},{"key":"e_1_3_2_1_10_1","volume-title":"May","author":"Zhang D.","year":"2021","unstructured":"D. Zhang, et al., \"A full-stack accelerator search technique for vision applications,\" arXiv:2105.12842, May 2021."},{"key":"e_1_3_2_1_11_1","volume-title":"Nov.","author":"Banerjee S.","year":"2021","unstructured":"S. Banerjee, et al., \"A Highly Configurable Hardware\/Software Stack for DNN Inference Acceleration,\" arXiv preprint arXiv:2111.15024, Nov. 2021."},{"key":"e_1_3_2_1_12_1","volume-title":"An FPGA-Based CNN Accelerator Integrating Depthwise Separable Convolution,\" Electronics","author":"Liu B.","year":"2019","unstructured":"B. Liu, et al., \"An FPGA-Based CNN Accelerator Integrating Depthwise Separable Convolution,\" Electronics, vol. 8, Mar. 2019."},{"key":"e_1_3_2_1_13_1","first-page":"247","volume-title":"GLSVLSI","author":"Baharani M.","year":"2021","unstructured":"M. Baharani, et al., \"DeepDive: An Integrative Algorithm\/Architecture Co-Design for Deep Separable Convolutional Neural Networks,\" in Proc. GLSVLSI, pp. 247--252, June 2021."},{"key":"e_1_3_2_1_14_1","first-page":"136","volume-title":"Proc. FPL","author":"Wu D.","year":"2019","unstructured":"D. Wu, et al., \"A High-Performance CNN Processor Based on FPGA for MobileNets,\" in Proc. FPL, pp. 136--143, 2019."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2018.2865896"},{"key":"e_1_3_2_1_16_1","first-page":"103","volume-title":"PACT","author":"Vedula N.","year":"2021","unstructured":"N. Vedula, et al., \"X-Layer: Building Composable Pipelined Dataflows for Low-Rank Convolutions,\" in Proc. PACT, pp. 103--115, 2021."},{"key":"e_1_3_2_1_17_1","first-page":"292","article-title":"Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices","volume":"9","author":"Chen Y.-H.","year":"2019","unstructured":"Y.-H. Chen, et al., \"Eyeriss v2: A Flexible Accelerator for Emerging Deep Neural Networks on Mobile Devices,\" IEEE JETCAS, vol. 9, pp. 292--308, Apr. 2019.","journal-title":"IEEE JETCAS"},{"key":"e_1_3_2_1_18_1","first-page":"578","volume-title":"OSDI","author":"Chen T.","year":"2018","unstructured":"T. Chen, et al., \"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning,\" in Proc. OSDI, pp. 578--594, Oct. 2018."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"e_1_3_2_1_20_1","volume-title":"A Comparative Study between RTL and HLS for Image Processing Applications with FPGAs","author":"Gurel M.","year":"2016","unstructured":"M. Gurel, A Comparative Study between RTL and HLS for Image Processing Applications with FPGAs. University of California, San Diego, 2016."},{"key":"e_1_3_2_1_21_1","unstructured":"\"VTA Hardware Design Stack.\" https:\/\/github.com\/pasqoc\/incubator-tvm-vta."}],"event":{"name":"ASPDAC '23: 28th Asia and South Pacific Design Automation Conference","location":"Tokyo Japan","acronym":"ASPDAC '23","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CEDA","IEICE","IEEE CAS","IPSJ"]},"container-title":["Proceedings of the 28th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3566097.3567863","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3566097.3567863","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3566097.3567863","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T17:33:31Z","timestamp":1767807211000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3566097.3567863"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,1,16]]},"references-count":21,"alternative-id":["10.1145\/3566097.3567863","10.1145\/3566097"],"URL":"https:\/\/doi.org\/10.1145\/3566097.3567863","relation":{},"subject":[],"published":{"date-parts":[[2023,1,16]]},"assertion":[{"value":"2023-01-31","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}