{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T11:46:16Z","timestamp":1762429576405,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3643832.3661892","type":"proceedings-article","created":{"date-parts":[[2024,6,4]],"date-time":"2024-06-04T17:14:23Z","timestamp":1717521263000},"page":"438-450","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["Empowering In-Browser Deep Learning Inference on Edge Through Just-In-Time Kernel Optimization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-7125-4386","authenticated-orcid":false,"given":"Fucheng","family":"Jia","sequence":"first","affiliation":[{"name":"Central South University, Microsoft Research, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4685-9633","authenticated-orcid":false,"given":"Shiqi","family":"Jiang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9107-013X","authenticated-orcid":false,"given":"Ting","family":"Cao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9362-3585","authenticated-orcid":false,"given":"Wei","family":"Cui","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3335-8705","authenticated-orcid":false,"given":"Tianrui","family":"Xia","sequence":"additional","affiliation":[{"name":"University of Southern California, Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1866-1909","authenticated-orcid":false,"given":"Xu","family":"Cao","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1591-2526","authenticated-orcid":false,"given":"Yuanchun","family":"Li","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1588-0293","authenticated-orcid":false,"given":"Qipeng","family":"Wang","sequence":"additional","affiliation":[{"name":"Peking University, Microsoft Research, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0298-7868","authenticated-orcid":false,"given":"Deyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Central South University, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2782-183X","authenticated-orcid":false,"given":"Ju","family":"Ren","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-8955","authenticated-orcid":false,"given":"Yunxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8131-7439","authenticated-orcid":false,"given":"Lili","family":"Qiu","sequence":"additional","affiliation":[{"name":"UT Austin, Microsoft Research, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6455-3898","authenticated-orcid":false,"given":"Mao","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,4]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Retrived in June 2023. Brain.js: GPU accelerated Neural networks in JavaScript for Browsers and Node.js. https:\/\/brain.js.org\/."},{"key":"e_1_3_2_1_2_1","unstructured":"Retrived in June 2023. The LLVM Compiler Infrastructure. https:\/\/llvm.org\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Retrived in June 2023. WebAssembly. https:\/\/webassembly.org\/."},{"key":"e_1_3_2_1_4_1","unstructured":"Retrived in June 2023. WebDNN. https:\/\/mil-tokyo.github.io\/webdnn\/."},{"key":"e_1_3_2_1_5_1","unstructured":"Retrived in June 2023. WebGL: 2D and 3D graphics for the web. https:\/\/developer.mozilla.org\/en-US\/docs\/Web\/API\/WebGL_API."},{"key":"e_1_3_2_1_6_1","unstructured":"Retrived in June 2023. WebGPU-W3C Working Draft. https:\/\/www.w3.org\/TR\/webgpu\/."},{"key":"e_1_3_2_1_7_1","unstructured":"Retrived in March 2024. Antares. https:\/\/github.com\/microsoft\/antares."},{"key":"e_1_3_2_1_8_1","unstructured":"Retrived in March 2024. IREE. https:\/\/iree.dev\/."},{"key":"e_1_3_2_1_9_1","unstructured":"Retrived in March 2024. OpenXLA. https:\/\/github.com\/openxla."},{"key":"e_1_3_2_1_10_1","volume-title":"Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation. In 8th International Conference on Learning Representations, ICLR 2020","author":"Ahn Byung Hoon","year":"2020","unstructured":"Byung Hoon Ahn, Prannoy Pilligundla, Amir Yazdanbakhsh, and Hadi Esmaeilzadeh. 2020. Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation. In 8th International Conference on Learning Representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020. OpenReview.net. https:\/\/openreview.net\/forum?id=rygG4AVFvH"},{"key":"e_1_3_2_1_11_1","unstructured":"Binaryen. 2023. https:\/\/github.com\/WebAssembly\/binaryen"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Meghan Cowan, Haichen Shen, Leyuan Wang, Yuwei Hu, Luis Ceze, Carlos Guestrin, and Arvind Krishnamurthy. 2018. TVM: An Automated End-to-End Optimizing Compiler for Deep Learning. In Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation (Carlsbad, CA, USA) (OSDI'18). USENIX Association, USA, 579--594."},{"key":"e_1_3_2_1_13_1","unstructured":"Brandon Jones Corentin Wallez and Fran\u00e7ois Beaufort. 2023. WebGPU: Unlocking modern GPU access in the browser. https:\/\/developer.chrome.com\/blog\/webgpuio2023\/#:~:text=WebGPU%20is%20the%20successor%20to GPU%20capabilities%20in%20the%20future."},{"key":"e_1_3_2_1_14_1","unstructured":"cuBLAS. 2023. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html"},{"key":"e_1_3_2_1_15_1","unstructured":"Eigen. 2023. https:\/\/eigen.tuxfamily.org\/index.php"},{"key":"e_1_3_2_1_16_1","unstructured":"Satya Jandhyala Emma Ning Yulong Wang. 2024. ONNX Runtime Web unleashes generative AI in the browser using WebGPU. https:\/\/cloudblogs.microsoft.com\/opensource\/2024\/02\/29\/onnx-runtime-web-unleashes-generative-ai-in-the-browser-using-webgpu\/"},{"key":"e_1_3_2_1_17_1","unstructured":"Emscripten. 2023. https:\/\/emscripten.org\/docs\/introducing_emscripten\/about_emscripten.html"},{"key":"e_1_3_2_1_18_1","unstructured":"V8 Engine. 2023. https:\/\/v8.dev\/"},{"key":"e_1_3_2_1_19_1","unstructured":"OpenGL ES. 2023. https:\/\/www.khronos.org\/opengles\/"},{"volume-title":"Achieving Zero-COGS with Microsoft Editor","author":"Ge Tao","key":"e_1_3_2_1_20_1","unstructured":"Tao Ge, Ting Cao, Siqing Chen, and Qiong Ning. 2023. Achieving Zero-COGS with Microsoft Editor Neural Grammar Checker. https:\/\/www.microsoft.com\/en-us\/research\/blog\/achieving-zero-cogs-with-microsoft-editor-neural-grammar-checker\/."},{"key":"e_1_3_2_1_21_1","unstructured":"Google. 2024. Why On-Device Machine Learning? https:\/\/developers.google.com\/learn\/topics\/on-device-ml\/learn-more"},{"key":"e_1_3_2_1_22_1","unstructured":"Thales Group. 2023. Cloud Assets the Biggest Targets for Cyberattacks as Data Breaches Increase. https:\/\/www.thalesgroup.com\/en\/worldwide\/security\/press_release\/cloud-assets-biggest-targets-cyberattacks-data-breaches-increase."},{"key":"e_1_3_2_1_23_1","unstructured":"WebAssembly Community Group. 2023. WebAssembly Specification Release 2.0."},{"key":"e_1_3_2_1_24_1","unstructured":"Huggingface. 2023. https:\/\/huggingface.co\/docs\/transformers.js\/index"},{"key":"e_1_3_2_1_25_1","unstructured":"JavaScript. 2023. https:\/\/www.w3.org\/standards\/"},{"key":"e_1_3_2_1_26_1","unstructured":"Khronos. 2023. SPIR overview. https:\/\/www.khronos.org\/spir\/"},{"key":"e_1_3_2_1_27_1","volume-title":"BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. CoRR abs\/1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Veselin Stoyanov, and Luke Zettlemoyer. 2019. BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension. CoRR abs\/1910.13461 (2019). arXiv:1910.13461 http:\/\/arxiv.org\/abs\/1910.13461"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538922"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3517020"},{"key":"e_1_3_2_1_30_1","volume-title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. CoRR abs\/1907.11692 (2019). arXiv:1907.11692 http:\/\/arxiv.org\/abs\/1907.11692"},{"key":"e_1_3_2_1_31_1","unstructured":"Web LLM. Jul 2023. Web LLM. https:\/\/github.com\/mlc-ai\/web-llm."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313639"},{"key":"e_1_3_2_1_33_1","unstructured":"OpenGL. 2023. https:\/\/www.opengl.org\/"},{"key":"e_1_3_2_1_34_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_35_1","first-page":"1","article-title":"2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. J. Mach. Learn. Res. 21 (2020), 140:1--140:67. http:\/\/jmlr.org\/papers\/v21\/20-074.html","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_36_1","unstructured":"Statista. 2023. https:\/\/www.statista.com\/topics\/5684\/web-browsers\/#topicOverview"},{"key":"e_1_3_2_1_37_1","unstructured":"Steam. 2023. https:\/\/store.steampowered.com\/hwsurvey\/Steam-Hardware-Software-Survey-Welcome-to-Steam"},{"key":"e_1_3_2_1_38_1","unstructured":"TensorFlow. 2023. https:\/\/www.tensorflow.org\/"},{"key":"e_1_3_2_1_39_1","unstructured":"TensorFlow.js. 2023. https:\/\/www.tensorflow.org\/js"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_41_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arXiv:2307.09288 [cs.CL]"},{"key":"e_1_3_2_1_42_1","unstructured":"Vulkan. 2023. https:\/\/www.vulkan.org\/"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447993.3448625"},{"key":"e_1_3_2_1_44_1","unstructured":"Qipeng Wang Shiqi Jiang Zhenpeng Chen Xu Cao Yuanchun Li Aoyu Li Ying Zhang Yun Ma Ting Cao and Xuanzhe Liu. 2024. Exploring the Impact of InBrowser Deep Learning Inference on Quality of User Experience and Performance. arXiv:2402.05981 [cs.LG]"},{"key":"e_1_3_2_1_45_1","unstructured":"ONNX Runtime Web. 2023. https:\/\/onnxruntime.ai\/docs\/tutorials\/web\/"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation (OSDI'20)","author":"Zheng Lianmin","year":"2020","unstructured":"Lianmin Zheng, Chengfan Jia, Minmin Sun, Zhao Wu, Cody Hao Yu, Ameer Haj-Ali, Yida Wang, Jun Yang, Danyang Zhuo, Koushik Sen, Joseph E. Gonzalez, and Ion Stoica. 2020. Ansor: Generating High-Performance Tensor Programs for Deep Learning. In Proceedings of the 14th USENIX Conference on Operating Systems Design and Implementation (OSDI'20). USENIX Association, USA, Article 49, 17 pages."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"e_1_3_2_1_48_1","volume-title":"ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)","author":"Zhu Hongyu","year":"2022","unstructured":"Hongyu Zhu, Ruofan Wu, Yijia Diao, Shanbin Ke, Haoyu Li, Chen Zhang, Jilong Xue, Lingxiao Ma, Yuqing Xia, Wei Cui, Fan Yang, Mao Yang, Lidong Zhou, Asaf Cidon, and Gennady Pekhimenko. 2022. ROLLER: Fast and Efficient Tensor Compilation for Deep Learning. In 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22). USENIX Association, Carlsbad, CA, 233--248. https:\/\/www.usenix.org\/conference\/osdi22\/presentation\/zhu"}],"event":{"name":"MOBISYS '24: 22nd Annual International Conference on Mobile Systems, Applications and Services","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Minato-ku, Tokyo Japan","acronym":"MOBISYS '24"},"container-title":["Proceedings of the 22nd Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661892","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3643832.3661892","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:07Z","timestamp":1750291387000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3643832.3661892"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":48,"alternative-id":["10.1145\/3643832.3661892","10.1145\/3643832"],"URL":"https:\/\/doi.org\/10.1145\/3643832.3661892","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-04","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}