{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T13:54:43Z","timestamp":1763733283062,"version":"3.45.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,3]]},"DOI":"10.1145\/3680207.3765247","type":"proceedings-article","created":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T13:19:18Z","timestamp":1763731158000},"page":"802-816","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AdaptQNet: Optimizing Quantized DNN on Microcontrollers via Adaptive Heterogeneous Processing Unit Utilization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-5477-1059","authenticated-orcid":false,"given":"Yansong","family":"Sun","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6461-3866","authenticated-orcid":false,"given":"Jialuo","family":"He","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9021-9916","authenticated-orcid":false,"given":"Dirk","family":"Kutscher","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0313-4421","authenticated-orcid":false,"given":"Huangxun","family":"Chen","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,11,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2020.2983648"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655922"},{"key":"e_1_3_2_1_3_1","volume-title":"Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems 33","author":"Dong Zhen","year":"2020","unstructured":"Zhen Dong, Zhewei Yao, Daiyaan Arfeen, Amir Gholami, Michael W Mahoney, and Kurt Keutzer. 2020. Hawq-v2: Hessian aware trace-weighted quantization of neural networks. Advances in neural information processing systems 33 (2020), 18518\u201318529."},{"key":"e_1_3_2_1_4_1","volume-title":"FLIQS: One-Shot Mixed-Precision Floating-Point and Integer Quantization Search. In International Conference on Automated Machine Learning. PMLR, 6\u20131.","author":"Dotzel Jordan","year":"2024","unstructured":"Jordan Dotzel, Gang Wu, Andrew Li, Muhammad Umar, Yun Ni, Mohamed S Abdelfattah, Zhiru Zhang, Liqun Cheng, Martin G Dixon, Norman P Jouppi, et al. 2024. FLIQS: One-Shot Mixed-Precision Floating-Point and Integer Quantization Search. In International Conference on Automated Machine Learning. PMLR, 6\u20131."},{"key":"e_1_3_2_1_5_1","first-page":"1","article-title":"Neural architecture search: A survey","volume":"20","author":"Elsken Thomas","year":"2019","unstructured":"Thomas Elsken, Jan Hendrik Metzen, and Frank Hutter. 2019. Neural architecture search: A survey. Journal of Machine Learning Research 20, 55 (2019), 1\u201321.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_6_1","volume-title":"Low-Power Computer Vision","author":"Gholami Amir","unstructured":"Amir Gholami, Sehoon Kim, Zhen Dong, Zhewei Yao, Michael W Mahoney, and Kurt Keutzer. 2022. A survey of quantization methods for efficient neural network inference. In Low-Power Computer Vision. Chapman and Hall\/CRC, 291\u2013326."},{"key":"e_1_3_2_1_7_1","volume-title":"A HW\/SW Co-optimized Mixed-precision Neural Network Design Framework for MCUs. arXiv preprint arXiv:2407.18267","author":"Gong Junfeng","year":"2024","unstructured":"Junfeng Gong, Cheng Liu, Long Cheng, Huawei Li, and Xiaowei Li. 2024. MCU-MixQ: A HW\/SW Co-optimized Mixed-precision Neural Network Design Framework for MCUs. arXiv preprint arXiv:2407.18267 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","volume-title":"Structured pruning for deep convolutional neural networks: A survey","author":"He Yang","year":"2023","unstructured":"Yang He and Lingao Xiao. 2023. Structured pruning for deep convolutional neural networks: A survey. IEEE transactions on pattern analysis and machine intelligence (2023)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00140"},{"key":"e_1_3_2_1_12_1","volume-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861","author":"Howard Andrew G","year":"2017","unstructured":"Andrew G Howard. 2017. Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690698"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3698388.3699628"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_2_1_16_1","volume-title":"Categorical Reparametrization with Gumble-Softmax. In International Conference on Learning Representations (ICLR","author":"Jang Eric","year":"2017","unstructured":"Eric Jang, Shixiang Gu, and Ben Poole. 2017. Categorical Reparametrization with Gumble-Softmax. In International Conference on Learning Representations (ICLR 2017). OpenReview. net."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538948"},{"key":"e_1_3_2_1_18_1","volume-title":"Proceedings of the 20th Annual International Conference on Mobile Systems, Applications and Services. 209\u2013221","author":"Jia Fucheng","year":"2022","unstructured":"Fucheng Jia, Deyu Zhang, Ting Cao, Shiqi Jiang, Yunxin Liu, Ju Ren, and Yaoxue Zhang. 2022. CoDL: efficient CPU-GPU co-execution for deep learning inference on mobile devices. In Proceedings of the 20th Annual International Conference on Mobile Systems, Applications and Services. 209\u2013221."},{"key":"e_1_3_2_1_19_1","volume-title":"2024 IEEE International Conference on Pervasive Computing and Communications (PerCom). IEEE, 1\u201310","author":"Jia Hong","year":"2024","unstructured":"Hong Jia, Young D Kwon, Dong Mat, Nhat Pham, Lorena Qendro, Tam Vu, and Cecilia Mascolo. 2024. UR2M: Uncertainty and resource-aware event detection on microcontrollers. In 2024 IEEE International Conference on Pervasive Computing and Communications (PerCom). IEEE, 1\u201310."},{"key":"e_1_3_2_1_20_1","volume-title":"The Thirty-eighth Annual Conference on Neural Information Processing Systems.","author":"Jia Hong","year":"2024","unstructured":"Hong Jia, Young D Kwon, Alessio Orsino, Ting Dang, Domenico Talia, and Cecilia Mascolo. 2024. TinyTTA: Efficient Test-time Adaptation via Early-exit Ensembles on Edge Devices. In The Thirty-eighth Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581791.3596853"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3649329.3655681"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303950"},{"key":"e_1_3_2_1_24_1","unstructured":"Alex Krizhevsky. 2009. Learning Multiple Layers of Features from Tiny Images. https:\/\/api.semanticscholar.org\/CorpusID:18268744"},{"key":"e_1_3_2_1_25_1","volume-title":"2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1\u201310","author":"Li Min","year":"2024","unstructured":"Min Li, Zihao Huang, Lin Chen, Junxing Ren, Miao Jiang, Fengfa Li, Jitao Fu, and Chenghua Gao. 2024. Contemporary advances in neural network quantization: A survey. In 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1\u201310."},{"key":"e_1_3_2_1_26_1","volume-title":"Neural networks on microcontrollers: saving memory at inference via operator reordering. arXiv preprint arXiv:1910.05110","author":"Liberis Edgar","year":"2019","unstructured":"Edgar Liberis and Nicholas D Lane. 2019. Neural networks on microcontrollers: saving memory at inference via operator reordering. arXiv preprint arXiv:1910.05110 (2019)."},{"key":"e_1_3_2_1_27_1","volume-title":"Mcunet: Tiny deep learning on iot devices. Advances in Neural Information Processing Systems 33","author":"Lin Ji","year":"2020","unstructured":"Ji Lin, Wei-Ming Chen, Yujun Lin, Chuang Gan, and Song Han. 2020. Mcunet: Tiny deep learning on iot devices. Advances in Neural Information Processing Systems 33 (2020)."},{"key":"e_1_3_2_1_28_1","volume-title":"The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables. In International Conference on Learning Representations.","author":"Maddison Chris J","year":"2022","unstructured":"Chris J Maddison, Andriy Mnih, and Yee Whye Teh. 2022. The Concrete Distribution: A Continuous Relaxation of Discrete Random Variables. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_29_1","volume-title":"Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort.","author":"Nagel Markus","year":"2021","unstructured":"Markus Nagel, Marios Fournarakis, Rana Ali Amjad, Yelysei Bondarenko, Mart Van Baalen, and Tijmen Blankevoort. 2021. A white paper on neural network quantization. arXiv preprint arXiv:2106.08295 (2021)."},{"key":"e_1_3_2_1_30_1","unstructured":"OpenAMP Project. 2023. OpenAMP Library - Open Asymmetric Multi Processing Framework. Online Documentation. https:\/\/openamp.readthedocs.io\/en\/latest\/doxygen\/openamp\/index.html Accessed: 2024-01-10."},{"key":"e_1_3_2_1_31_1","volume-title":"International conference on machine learning. PMLR, 4095\u20134104","author":"Pham Hieu","year":"2018","unstructured":"Hieu Pham, Melody Guan, Barret Zoph, Quoc Le, and Jeff Dean. 2018. Efficient neural architecture search via parameters sharing. In International conference on machine learning. PMLR, 4095\u20134104."},{"key":"e_1_3_2_1_32_1","volume-title":"Mixed-precision neural networks: A survey. arXiv preprint arXiv:2208.06064","author":"Rakka Mariam","year":"2022","unstructured":"Mariam Rakka, Mohammed E Fouda, Pramod Khargonekar, and Fadi Kurdahi. 2022. Mixed-precision neural networks: A survey. arXiv preprint arXiv:2208.06064 (2022)."},{"key":"e_1_3_2_1_33_1","volume-title":"A Review of State-of-the-Art Mixed-Precision Neural Network Frameworks","author":"Rakka Mariam","year":"2024","unstructured":"Mariam Rakka, Mohammed E Fouda, Pramod Khargonekar, and Fadi Kurdahi. 2024. A Review of State-of-the-Art Mixed-Precision Neural Network Frameworks. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"A comprehensive survey of neural architecture search: Challenges and solutions","volume":"54","author":"Ren Pengzhen","year":"2021","unstructured":"Pengzhen Ren, Yun Xiao, Xiaojun Chang, Po-Yao Huang, Zhihui Li, Xiaojiang Chen, and Xin Wang. 2021. A comprehensive survey of neural architecture search: Challenges and solutions. ACM Computing Surveys (CSUR) 54, 4 (2021), 1\u201334.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3623402"},{"key":"e_1_3_2_1_36_1","first-page":"326","article-title":"Memory-driven mixed low precision quantization for enabling deep network inference on microcontrollers","volume":"2","author":"Rusci Manuele","year":"2020","unstructured":"Manuele Rusci, Alessandro Capotondi, and Luca Benini. 2020. Memory-driven mixed low precision quantization for enabling deep network inference on microcontrollers. Proceedings of Machine Learning and Systems 2 (2020), 326\u2013335.","journal-title":"Proceedings of Machine Learning and Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_38_1","volume-title":"Artificial Intelligence of Things: A Survey. ACM Transactions on Sensor Networks","author":"Siam Shakhrul Iman","year":"2024","unstructured":"Shakhrul Iman Siam, Hyunho Ahn, Li Liu, Samiul Alam, Hui Shen, Zhichao Cao, Ness Shroff, Bhaskar Krishnamachari, Mani Srivastava, and Mi Zhang. 2024. Artificial Intelligence of Things: A Survey. ACM Transactions on Sensor Networks (2024)."},{"key":"e_1_3_2_1_39_1","unstructured":"Statista. [n. d.]. Leading microcontroller unit (MCU) manufacturers worldwide. https:\/\/www.statista.com\/statistics\/1327509\/top-mcu-suppliers-worldwide\/. https:\/\/www.statista.com\/statistics\/1327509\/top-mcu-suppliers-worldwide\/"},{"key":"e_1_3_2_1_40_1","unstructured":"STMicroelectronics. [n. d.]. X-CUBE-AI: AI expansion pack for STM32CubeMX. https:\/\/www.st.com\/en\/embedded-software\/x-cube-ai.html. https:\/\/www.st.com\/en\/embedded-software\/x-cube-ai.html"},{"key":"e_1_3_2_1_41_1","unstructured":"STMicroelectronics. 2023. STM32CubeMX - STM32Cube initialization code generator. Software Tool. https:\/\/www.st.com\/en\/development-tools\/stm32cubemx.html Accessed: 2024-01-10."},{"key":"e_1_3_2_1_42_1","unstructured":"STMicroelectronics. 2023. STM32H7 Introduction: Dual-Core Architecture. Application Note DM00733995. https:\/\/www.st.com\/resource\/en\/application_note\/dm00733995.pdf Accessed: 2024-01-10."},{"key":"e_1_3_2_1_43_1","unstructured":"STMicroelectronics. 2024. ARM Cortex-M7 Microcontroller. https:\/\/www.st.com\/content\/st_com\/en\/arm-32-bit-microcontrollers\/arm-cortex-m7.html. Accessed: 2024-12-10."},{"key":"e_1_3_2_1_44_1","volume-title":"International conference on machine learning. PMLR, 6105\u20136114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105\u20136114."},{"key":"e_1_3_2_1_45_1","volume-title":"Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 1\u20136.","author":"Tao Wei","year":"2024","unstructured":"Wei Tao, Shenglin He, Kai Lu, Xiaoyang Qu, Guokuan Li, Jiguang Wan, Jianzong Wang, and Jing Xiao. 2024. Value-Driven Mixed-Precision Quantization for Patch-Based Inference on Microcontrollers. In 2024 Design, Automation & Test in Europe Conference & Exhibition (DATE). IEEE, 1\u20136."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.5555\/3157382.3157504"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00881"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581791.3596870"},{"key":"e_1_3_2_1_49_1","volume-title":"Mixed precision quantization of convnets via differentiable neural architecture search. arXiv preprint arXiv:1812.00090","author":"Wu Bichen","year":"2018","unstructured":"Bichen Wu, Yanghan Wang, Peizhao Zhang, Yuandong Tian, Peter Vajda, and Kurt Keutzer. 2018. Mixed precision quantization of convnets via differentiable neural architecture search. arXiv preprint arXiv:1812.00090 (2018)."},{"key":"e_1_3_2_1_50_1","volume-title":"Low-Latency Deep Learning Inference Schedule on Multi-Core MCU. In 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1\u20138.","author":"Xu Chaonong","year":"2024","unstructured":"Chaonong Xu, Min Liu, Chao Li, and Weiming Kong. 2024. Low-Latency Deep Learning Inference Schedule on Multi-Core MCU. In 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1\u20138."},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the 28th Annual International Conference on Mobile Computing And Networking. 214\u2013227","author":"Xu Daliang","year":"2022","unstructured":"Daliang Xu, Mengwei Xu, Qipeng Wang, Shangguang Wang, Yun Ma, Kang Huang, Gang Huang, Xin Jin, and Xuanzhe Liu. 2022. Mandheling: Mixed-precision on-device dnn training with dsp offloading. In Proceedings of the 28th Annual International Conference on Mobile Computing And Networking. 214\u2013227."},{"key":"e_1_3_2_1_52_1","volume-title":"2020 39th Chinese Control Conference (CCC). IEEE, 7458\u20137463","author":"Xu Sheng","year":"2020","unstructured":"Sheng Xu, Anran Huang, Lei Chen, and Baochang Zhang. 2020. Convolutional neural network pruning: A survey. In 2020 39th Chinese Control Conference (CCC). IEEE, 7458\u20137463."},{"key":"e_1_3_2_1_53_1","volume-title":"Rapq: Rescuing accuracy for power-of-two low-bit post-training quantization. arXiv preprint arXiv:2204.12322","author":"Yao Hongyi","year":"2022","unstructured":"Hongyi Yao, Pu Li, Jian Cao, Xiangcheng Liu, Chenying Xie, and Bingzhang Wang. 2022. Rapq: Rescuing accuracy for power-of-two low-bit post-training quantization. arXiv preprint arXiv:2204.12322 (2022)."},{"key":"e_1_3_2_1_54_1","volume-title":"International Conference on Machine Learning. PMLR, 11875\u201311886","author":"Yao Zhewei","year":"2021","unstructured":"Zhewei Yao, Zhen Dong, Zhangcheng Zheng, Amir Gholami, Jiali Yu, Eric Tan, Leyuan Wang, Qijing Huang, Yida Wang, Michael Mahoney, et al. 2021. Hawq-v3: Dyadic neural network quantization. In International Conference on Machine Learning. PMLR, 11875\u201311886."},{"key":"e_1_3_2_1_55_1","volume-title":"European Conference on Computer Vision. Springer, 1\u201316","author":"Yu Haibao","year":"2020","unstructured":"Haibao Yu, Qi Han, Jianbo Li, Jianping Shi, Guangliang Cheng, and Bin Fan. 2020. Search what you want: Barrier panelty nas for mixed precision quantization. In European Conference on Computer Vision. Springer, 1\u201316."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458864.3467882"}],"event":{"name":"ACM MOBICOM '25: 31st Annual International Conference on Mobile Computing and Networking","location":"Kerry Hotel, Hong Kong Hong Kong China","acronym":"ACM MOBICOM '25","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing"]},"container-title":["Proceedings of the 31st Annual International Conference on Mobile Computing and Networking"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680207.3765247","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T13:28:52Z","timestamp":1763731732000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680207.3765247"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,3]]},"references-count":56,"alternative-id":["10.1145\/3680207.3765247","10.1145\/3680207"],"URL":"https:\/\/doi.org\/10.1145\/3680207.3765247","relation":{},"subject":[],"published":{"date-parts":[[2025,11,3]]},"assertion":[{"value":"2025-11-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}