{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T05:13:44Z","timestamp":1780722824252,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T00:00:00Z","timestamp":1716940800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,29]]},"DOI":"10.1145\/3636534.3649391","type":"proceedings-article","created":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T13:32:55Z","timestamp":1716989575000},"page":"709-723","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":34,"title":["FlexNN: Efficient and Adaptive DNN Inference on Memory-Constrained Edge Devices"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5341-2303","authenticated-orcid":false,"given":"Xiangyu","family":"Li","sequence":"first","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1591-2526","authenticated-orcid":false,"given":"Yuanchun","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0594-2745","authenticated-orcid":false,"given":"Yuanzhe","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9107-013X","authenticated-orcid":false,"given":"Ting","family":"Cao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-8955","authenticated-orcid":false,"given":"Yunxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,5,29]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Rafal Jozefowicz Yangqing Jia Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dan Man\u00e9 Mike Schuster Rajat Monga Sherry Moore Derek Murray Chris Olah Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow Large-scale machine learning on heterogeneous systems. 10.5281\/zenodo.4724125","DOI":"10.5281\/zenodo.4724125"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2021.01.021"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1002\/ett.4150"},{"key":"e_1_3_2_1_4_1","volume-title":"Once-for-all: Train one network and specialize it for efficient deployment. arXiv preprint arXiv:1908.09791","author":"Cai Han","year":"2019","unstructured":"Han Cai, Chuang Gan, Tianzhe Wang, Zhekai Zhang, and Song Han. 2019. Once-for-all: Train one network and specialize it for efficient deployment. arXiv preprint arXiv:1908.09791 (2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332","author":"Cai Han","year":"2018","unstructured":"Han Cai, Ligeng Zhu, and Song Han. 2018. Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332 (2018)."},{"key":"e_1_3_2_1_6_1","unstructured":"developer.arm.com. 2023. NEON and Floating-Point architecture. https:\/\/developer.arm.com\/documentation\/den0024\/a\/AArch64-Floating-point-and-NEON\/NEON-and-Floating-Point-architecture accessed: 2023-08-25."},{"key":"e_1_3_2_1_7_1","unstructured":"Android Developers. 2023. Manage your app's memory. https:\/\/developer.android.com\/topic\/performance\/memory accessed: 2023-08-01."},{"key":"e_1_3_2_1_8_1","unstructured":"MACE Developers. 2023. XiaoMi\/mace: MACE is a deep learning inference framework optimized for mobile heterogeneous computing platforms. https:\/\/github.com\/XiaoMi\/mace accessed: 2023-08-01."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2477042"},{"key":"e_1_3_2_1_10_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241559"},{"key":"e_1_3_2_1_12_1","volume-title":"Deep learning-based image recognition for autonomous driving. IATSS research 43, 4","author":"Fujiyoshi Hironobu","year":"2019","unstructured":"Hironobu Fujiyoshi, Tsubasa Hirakawa, and Takayoshi Yamashita. 2019. Deep learning-based image recognition for autonomous driving. IATSS research 43, 4 (2019), 244--252."},{"key":"e_1_3_2_1_13_1","unstructured":"GadgetVersus. 2023. Apple iPhone 4 vs Apple iPhone 14 Benchmarks Specs Performance Comparison and Differences. https:\/\/gadgetversus.com\/smartphone\/apple-iphone-4-vs-apple-iphone-14\/ accessed: 2023-08-01."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1002\/rob.21918"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.array.2021.100057"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483249"},{"key":"e_1_3_2_1_17_1","volume-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149","author":"Han Song","year":"2015","unstructured":"Song Han, Huizi Mao, and William J Dally. 2015. Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding. arXiv preprint arXiv:1510.00149 (2015)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_48"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"key":"e_1_3_2_1_21_1","volume-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360","author":"Iandola Forrest N","year":"2016","unstructured":"Forrest N Iandola, Song Han, Matthew W Moskewicz, Khalid Ashraf, William J Dally, and Kurt Keutzer. 2016. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and < 0.5 MB model size. arXiv preprint arXiv:1602.07360 (2016)."},{"key":"e_1_3_2_1_22_1","volume-title":"MNN: A Universal and Efficient Inference Engine. In MLSys.","author":"Jiang Xiaotang","year":"2020","unstructured":"Xiaotang Jiang, Huan Wang, Yiliu Chen, Ziqi Wu, Lichuan Wang, Bin Zou, Yafeng Yang, Zongyang Cui, Yu Cai, Tianhang Yu, Chengfei Lv, and Zhihua Wu. 2020. MNN: A Universal and Efficient Inference Engine. In MLSys."},{"key":"e_1_3_2_1_23_1","unstructured":"Poole John. 2016. Geekbench 4. https:\/\/www.geekbench.com\/blog\/2016\/08\/geekbench-4\/ accessed: 2023-08-06."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2019.2962338"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10586-017-1117-8"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3345447"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00339"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3567955.3567961"},{"key":"e_1_3_2_1_29_1","unstructured":"Hui Ni and The ncnn contributors. 2017. ncnn. https:\/\/github.com\/Tencent\/ncnn"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5244\/C.29.41"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378505"},{"key":"e_1_3_2_1_32_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00045"},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"A comprehensive survey of neural architecture search: Challenges and solutions","volume":"54","author":"Ren Pengzhen","year":"2021","unstructured":"Pengzhen Ren, Yun Xiao, Xiaojun Chang, Po-Yao Huang, Zhihui Li, Xiaojiang Chen, and Xin Wang. 2021. A comprehensive survey of neural architecture search: Challenges and solutions. ACM Computing Surveys (CSUR) 54, 4 (2021), 1--34.","journal-title":"ACM Computing Surveys (CSUR)"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"e_1_3_2_1_37_1","unstructured":"Ying Sheng Lianmin Zheng Binhang Yuan Zhuohan Li Max Ryabinin Daniel Y Fu Zhiqiang Xie Beidi Chen Clark Barrett Joseph E Gonzalez et al. 2023. High-throughput generative inference of large language models with a single gpu. arXiv preprint arXiv:2303.06865 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_39_1","volume-title":"Deepid3: Face recognition with very deep neural networks. arXiv preprint arXiv:1502.00873","author":"Sun Yi","year":"2015","unstructured":"Yi Sun, Ding Liang, Xiaogang Wang, and Xiaoou Tang. 2015. Deepid3: Face recognition with very deep neural networks. arXiv preprint arXiv:1502.00873 (2015)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00293"},{"key":"e_1_3_2_1_41_1","volume-title":"International conference on machine learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105--6114."},{"key":"e_1_3_2_1_42_1","volume-title":"Achieving Zero-COGS with Microsoft Editor","author":"Tao Ge","year":"2023","unstructured":"Ge Tao, Ting Cao, Si-Qing Chen, and Qiong(Emma) Ning. 2023. Achieving Zero-COGS with Microsoft Editor Neural Grammar Checker. https:\/\/www.microsoft.com\/en-us\/research\/blog\/achieving-zero-cogs-with-microsoft-editor-neural-grammar-checker\/, accessed: 2023-08-11."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.10.081"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538928"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Hao Wen Yuanchun Li Zunshuai Zhang Shiqi Jiang Xiaozhou Ye Ye Ouyang Ya-Qin Zhang and Yunxin Liu. 2023. AdaptiveNet: Post-deployment Neural Architecture Adaptation for Diverse Edge Environments. (2023).","DOI":"10.1145\/3570361.3592529"},{"key":"e_1_3_2_1_46_1","unstructured":"Tsu William. 2020. Introducing NVIDIA HGX A100: The Most Powerful Accelerated Server Platform for AI and High Performance Computing. https:\/\/developer.nvidia.com\/blog\/introducing-hgx-a100-most-powerful-accelerated-server-platform-for-ai-hpc\/ accessed: 2023-08-25."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01099"},{"key":"e_1_3_2_1_48_1","volume-title":"Driver activity recognition for intelligent vehicles: A deep learning approach","author":"Xing Yang","year":"2019","unstructured":"Yang Xing, Chen Lv, Huaji Wang, Dongpu Cao, Efstathios Velenis, and Fei-Yue Wang. 2019. Driver activity recognition for intelligent vehicles: A deep learning approach. IEEE transactions on Vehicular Technology 68, 6 (2019), 5379--5390."},{"key":"e_1_3_2_1_49_1","volume-title":"Yao Shu, Bingsheng He, and Wei Wang.","author":"Zhang Junzhe","year":"2019","unstructured":"Junzhe Zhang, Sai Ho Yeung, Yao Shu, Bingsheng He, and Wei Wang. 2019. Efficient memory management for gpu-based deep learning systems. arXiv preprint arXiv:1903.06631 (2019)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00354"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1080\/23249935.2019.1637966"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00716"}],"event":{"name":"ACM MobiCom '24: 30th Annual International Conference on Mobile Computing and Networking","location":"Washington D.C. DC USA","acronym":"ACM MobiCom '24","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing"]},"container-title":["Proceedings of the 30th Annual International Conference on Mobile Computing and Networking"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636534.3649391","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3636534.3649391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T22:54:12Z","timestamp":1750287252000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3636534.3649391"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,29]]},"references-count":52,"alternative-id":["10.1145\/3636534.3649391","10.1145\/3636534"],"URL":"https:\/\/doi.org\/10.1145\/3636534.3649391","relation":{},"subject":[],"published":{"date-parts":[[2024,5,29]]},"assertion":[{"value":"2024-05-29","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}