{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T18:25:30Z","timestamp":1763749530584,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["RS-2024-00344323"],"award-info":[{"award-number":["RS-2024-00344323"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3711875.3729161","type":"proceedings-article","created":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T15:45:39Z","timestamp":1758815139000},"page":"249-262","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["ARIA: Optimizing Vision Foundation Model Inference on Heterogeneous Mobile Processors for Augmented Reality"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-7279-6397","authenticated-orcid":false,"given":"Chanyoung","family":"Jung","sequence":"first","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9035-2602","authenticated-orcid":false,"given":"Jeho","family":"Lee","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-2584-8741","authenticated-orcid":false,"given":"Gunjoong","family":"Kim","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5182-2667","authenticated-orcid":false,"given":"Jiwon","family":"Kim","sequence":"additional","affiliation":[{"name":"Uppsala University, Uppsala, Sweden"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7336-6295","authenticated-orcid":false,"given":"Seonghoon","family":"Park","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9060-5091","authenticated-orcid":false,"given":"Hojung","family":"Cha","sequence":"additional","affiliation":[{"name":"Yonsei University, Seoul, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,9,25]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"ARCore. 2024. Camera. https:\/\/developers.google.com\/ar\/reference\/java\/com\/google\/ar\/core\/Camera."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 4009\u20134018","author":"Bhat Shariq Farooq","year":"2021","unstructured":"Shariq Farooq Bhat, Ibraheem Alhashim, and Peter Wonka. 2021. Adabins: Depth estimation using adaptive bins. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 4009\u20134018."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00218"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547826"},{"key":"e_1_3_2_1_9_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3666025.3699339"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01532"},{"key":"e_1_3_2_1_12_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research 23, 120 (2022), 1\u201339.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_13_1","volume-title":"Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323","author":"Frantar Elias","year":"2022","unstructured":"Elias Frantar, Saleh Ashkboos, Torsten Hoefler, and Dan Alistarh. 2022. Gptq: Accurate post-training quantization for generative pre-trained transformers. arXiv preprint arXiv:2210.17323 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. PMLR, 1180\u20131189","author":"Ganin Yaroslav","year":"2015","unstructured":"Yaroslav Ganin and Victor Lempitsky. 2015. Unsupervised domain adaptation by backpropagation. In International conference on machine learning. PMLR, 1180\u20131189."},{"key":"e_1_3_2_1_15_1","unstructured":"Google. 2024. ARCore. https:\/\/developers.google.com\/ar."},{"key":"e_1_3_2_1_16_1","unstructured":"Google. 2024. GPU delegates for LiteRT. https:\/\/ai.google.dev\/edge\/litert\/performance\/gpu."},{"key":"e_1_3_2_1_17_1","unstructured":"Google. 2024. LiteRT overview. https:\/\/ai.google.dev\/edge\/litert."},{"key":"e_1_3_2_1_18_1","unstructured":"Google. 2024. Scene Semantics API. https:\/\/developers.google.com\/ar\/develop\/scene-semantics."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690698"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3081333.3081360"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538948"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538932"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3483274"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690668"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690688"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643832.3661891"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00822"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3300061.3345455"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538918"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560905.3568520"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/3540261.3542413"},{"key":"e_1_3_2_1_33_1","volume-title":"Juhyun Lee, et al.","author":"Lugaresi Camillo","year":"2019","unstructured":"Camillo Lugaresi, Jiuqiang Tang, Hadon Nash, Chris McClanahan, Esha Uboweja, Michael Hays, Fan Zhang, Chuo-Ling Chang, Ming Guang Yong, Juhyun Lee, et al. 2019. Mediapipe: A framework for building perception pipelines. arXiv preprint arXiv:1906.08172 (2019)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378534"},{"key":"e_1_3_2_1_35_1","unstructured":"onnxruntime. 2024. QNN Execution Provider. https:\/\/onnxruntime.ai\/docs\/execution-providers\/QNN-ExecutionProvider.html."},{"key":"e_1_3_2_1_36_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3218603.3218622"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583120.3587045"},{"key":"e_1_3_2_1_39_1","unstructured":"Qualcomm. 2024. Qualcomm Linux TensorFlow Lite Runtime Reference. https:\/\/docs.qualcomm.com\/bundle\/publicresource\/topics\/80-70014-54\/overview.html."},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. PmLR, 8748\u20138763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01073"},{"key":"e_1_3_2_1_43_1","volume-title":"So Kweon, and Sungha Choi.","author":"Song Junha","year":"2023","unstructured":"Junha Song, Jungsoo Lee, In So Kweon, and Sungha Choi. 2023. Ecotta: Memory-efficient continual test-time adaptation via self-distilled regularization. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11920\u201311929."},{"key":"e_1_3_2_1_44_1","unstructured":"stereolabs. 2024. ZED 2. https:\/\/www.stereolabs.com\/products\/zed-2."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01423"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMC.2023.3255822"},{"key":"e_1_3_2_1_47_1","volume-title":"Tent: Fully test-time adaptation by entropy minimization. arXiv preprint arXiv:2006.10726","author":"Wang Dequan","year":"2020","unstructured":"Dequan Wang, Evan Shelhamer, Shaoteng Liu, Bruno Olshausen, and Trevor Darrell. 2020. Tent: Fully test-time adaptation by entropy minimization. arXiv preprint arXiv:2006.10726 (2020)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00706"},{"key":"e_1_3_2_1_49_1","volume-title":"Region-based content enhancement for efficient video analytics at the edge. arXiv preprint arXiv:2407.16990","author":"Wang Weijun","year":"2024","unstructured":"Weijun Wang, Liang Mi, Shaowei Cen, Haipeng Dai, Yuanchun Li, Xiaoming Fu, and Yunxin Liu. 2024. Region-based content enhancement for efficient video analytics at the edge. arXiv preprint arXiv:2407.16990 (2024)."},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Wang Wenhui","year":"2020","unstructured":"Wenhui Wang, Furu Wei, Li Dong, Hangbo Bao, Nan Yang, and Ming Zhou. 2020. MINILM: deep self-attention distillation for task-agnostic compression of pre-trained transformers. In Proceedings of the 34th International Conference on Neural Information Processing Systems (Vancouver, BC, Canada) (NIPS '20). Curran Associates Inc., Red Hook, NY, USA, Article 485, 13 pages."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00501"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794182"},{"key":"e_1_3_2_1_53_1","volume-title":"Dee-BERT: Dynamic early exiting for accelerating BERT inference. arXiv preprint arXiv:2004.12993","author":"Xin Ji","year":"2020","unstructured":"Ji Xin, Raphael Tang, Jaejun Lee, Yaoliang Yu, and Jimmy Lin. 2020. Dee-BERT: Dynamic early exiting for accelerating BERT inference. arXiv preprint arXiv:2004.12993 (2020)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3669940.3707239"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241539.3241563"},{"key":"e_1_3_2_1_56_1","volume-title":"Powerinfer-2: Fast large language model inference on a smartphone. arXiv preprint arXiv:2406.06282","author":"Xue Zhenliang","year":"2024","unstructured":"Zhenliang Xue, Yixin Song, Zeyu Mi, Xinrui Zheng, Yubin Xia, and Haibo Chen. 2024. Powerinfer-2: Fast large language model inference on a smartphone. arXiv preprint arXiv:2406.06282 (2024)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625687.3625793"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"e_1_3_2_1_59_1","volume-title":"Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He.","author":"Yao Zhewei","year":"2022","unstructured":"Zhewei Yao, Reza Yazdani Aminabadi, Minjia Zhang, Xiaoxia Wu, Conglong Li, and Yuxiong He. 2022. ZeroQuant: Efficient and Affordable Post-Training Quantization for Large-Scale Transformers. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 27168\u201327183. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/adf7fa39d65e2983d724ff7da57f00ac-Paper-Conference.pdf"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372224.3419185"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544216.3544218"},{"key":"e_1_3_2_1_62_1","volume-title":"Edgemoe: Fast on-device inference of moe-based large language models. arXiv preprint arXiv:2308.14352","author":"Yi Rongjie","year":"2023","unstructured":"Rongjie Yi, Liwei Guo, Shiyun Wei, Ao Zhou, Shangguang Wang, and Mengwei Xu. 2023. Edgemoe: Fast on-device inference of moe-based large language models. arXiv preprint arXiv:2308.14352 (2023)."},{"key":"e_1_3_2_1_63_1","volume-title":"Sung-Ho Bae, Seungkyu Lee, and Choong Seon Hong.","author":"Zhang Chaoning","year":"2023","unstructured":"Chaoning Zhang, Dongshen Han, Yu Qiao, Jung Uk Kim, Sung-Ho Bae, Seungkyu Lee, and Choong Seon Hong. 2023. Faster segment anything: Towards lightweight sam for mobile applications. arXiv preprint arXiv:2306.14289 (2023)."},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.423"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.544"},{"key":"e_1_3_2_1_67_1","first-page":"18330","article-title":"Bert loses patience: Fast and robust inference with early exit","volume":"33","author":"Zhou Wangchunshu","year":"2020","unstructured":"Wangchunshu Zhou, Canwen Xu, Tao Ge, Julian McAuley, Ke Xu, and Furu Wei. 2020. Bert loses patience: Fast and robust inference with early exit. Advances in Neural Information Processing Systems 33 (2020), 18330\u201318341.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"MobiSys '25: 23rd Annual International Conference on Mobile Systems, Applications and Services","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Hilton Anaheim Anaheim CA USA","acronym":"MobiSys '25"},"container-title":["Proceedings of the 23rd Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711875.3729161","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T19:33:51Z","timestamp":1759433631000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711875.3729161"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,23]]},"references-count":67,"alternative-id":["10.1145\/3711875.3729161","10.1145\/3711875"],"URL":"https:\/\/doi.org\/10.1145\/3711875.3729161","relation":{},"subject":[],"published":{"date-parts":[[2025,6,23]]},"assertion":[{"value":"2025-09-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}