{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T18:58:20Z","timestamp":1772823500891,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62325201"],"award-info":[{"award-number":["62325201"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3662006.3662066","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T12:23:36Z","timestamp":1718108616000},"page":"33-35","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["WiP: Efficient LLM Prefilling with Mobile NPU"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6775-0688","authenticated-orcid":false,"given":"Daliang","family":"Xu","sequence":"first","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1107-4688","authenticated-orcid":false,"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing jiaotong Univsersity, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1163-028X","authenticated-orcid":false,"given":"Liming","family":"Yang","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3188-0976","authenticated-orcid":false,"given":"Ruiqi","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6271-6993","authenticated-orcid":false,"given":"Mengwei","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7908-8484","authenticated-orcid":false,"given":"Xuanzhe","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2023. Gboard - the Google Keyboard - Apps on Google Play --- play.google.com. https:\/\/play.google.com\/store\/apps\/details?id=com.google.android.inputmethod.latin&hl=en."},{"key":"e_1_3_2_1_2_1","unstructured":"2023. Offlice copilot. https:\/\/blogs.microsoft.com\/blog\/2023\/03\/16\/introducing-microsoft-365-copilot-your- copilot- for- work\/."},{"key":"e_1_3_2_1_3_1","unstructured":"2023. Siri --- apple.com. https:\/\/www.apple.com\/siri\/."},{"key":"e_1_3_2_1_4_1","unstructured":"2023. Snapdragon 8gen3 SoC. https:\/\/www.qualcomm.com\/products\/mobile\/snapdragon\/smartphones\/snapdragon-8-series-mobile-platforms\/snapdragon-8-gen-3-mobile-platform."},{"key":"e_1_3_2_1_5_1","unstructured":"2024. LlamaTouch. https:\/\/github.com\/LlamaTouch\/LlamaTouch."},{"key":"e_1_3_2_1_6_1","volume-title":"Sarathi: Efficient llm inference by piggybacking decodes with chunked prefills. arXiv preprint arXiv:2308.16369","author":"Agrawal Amey","year":"2023","unstructured":"Amey Agrawal, Ashish Panwar, Jayashree Mohan, Nipun Kwatra, Bhargav S Gulavani, and Ramachandran Ramjee. 2023. Sarathi: Efficient llm inference by piggybacking decodes with chunked prefills. arXiv preprint arXiv:2308.16369 (2023)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580895"},{"key":"e_1_3_2_1_8_1","volume-title":"Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu.","author":"Wen Hao","year":"2023","unstructured":"Hao Wen, Yuanchun Li, Guohong Liu, Shanhui Zhao, Tao Yu, Toby Jia-Jun Li, Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu. 2023. Empowering llm to use smartphone for intelligent task automation. arXiv preprint arXiv:2308.15272 (2023)."},{"key":"e_1_3_2_1_9_1","unstructured":"Daliang Xu Mengwei Xu Chiheng Lou Li Zhang Gang Huang Xin Jin and Xuanzhe Liu. 2024. SoCFlow: Efficient and Scalable DNN Training on SoC-Clustered Edge Servers. (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3495243.3560545"},{"key":"e_1_3_2_1_11_1","volume-title":"LLM-PQ: Serving LLM on Heterogeneous Clusters with Phase-Aware Partition and Adaptive Quantization. arXiv preprint arXiv:2403.01136","author":"Zhao Juntao","year":"2024","unstructured":"Juntao Zhao, Borui Wan, Yanghua Peng, Haibin Lin, and Chuan Wu. 2024. LLM-PQ: Serving LLM on Heterogeneous Clusters with Phase-Aware Partition and Adaptive Quantization. arXiv preprint arXiv:2403.01136 (2024)."}],"event":{"name":"MOBISYS '24: The 22nd Annual International Conference on Mobile Systems, Applications and Services","location":"Minato-ku Tokyo Japan","acronym":"MOBISYS '24","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the Workshop on Edge and Mobile Foundation Models"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662066","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3662006.3662066","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:18:23Z","timestamp":1755980303000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662066"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":11,"alternative-id":["10.1145\/3662006.3662066","10.1145\/3662006"],"URL":"https:\/\/doi.org\/10.1145\/3662006.3662066","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}