{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:40:01Z","timestamp":1755981601805,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":13,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T00:00:00Z","timestamp":1717372800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,3]]},"DOI":"10.1145\/3662006.3662062","type":"proceedings-article","created":{"date-parts":[[2024,6,11]],"date-time":"2024-06-11T12:23:36Z","timestamp":1718108616000},"page":"16-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["WiP: A Solution for Reducing MLLM-Based Agent Interaction Overhead"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7427-1688","authenticated-orcid":false,"given":"Wenjie","family":"Li","sequence":"first","affiliation":[{"name":"Fields Lab, Huawei, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-1749-8785","authenticated-orcid":false,"given":"Xiaoyang","family":"Liu","sequence":"additional","affiliation":[{"name":"Fields Lab, Huawei, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2020-3910","authenticated-orcid":false,"given":"Zihao","family":"Zheng","sequence":"additional","affiliation":[{"name":"Fields Lab, Huawei, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9500-1008","authenticated-orcid":false,"given":"Jishun","family":"Wang","sequence":"additional","affiliation":[{"name":"Fields Lab, Huawei, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3750-6094","authenticated-orcid":false,"given":"Kang","family":"Ling","sequence":"additional","affiliation":[{"name":"Fields Lab, Huawei Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6984-3220","authenticated-orcid":false,"given":"Ming","family":"Fu","sequence":"additional","affiliation":[{"name":"Fields Lab, Huawei Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,6,11]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_2_1_2_1","unstructured":"OpenAI. 2021. ChatGPT. https:\/\/openai.com\/research\/chatgpt."},{"key":"e_1_3_2_1_3_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv:2303.08774 [cs.CL]"},{"key":"e_1_3_2_1_4_1","volume-title":"Gemini: A Family of Highly Capable Multimodal Models. arXiv:2312.11805 [cs.CL]","author":"Rohan Anil Oriol Vinyals","year":"2024","unstructured":"Oriol Vinyals Rohan Anil, Jeffrey Dean. 2024. Gemini: A Family of Highly Capable Multimodal Models. arXiv:2312.11805 [cs.CL]"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2021. High-Resolution Image Synthesis with Latent Diffusion Models. arXiv:2112.10752 [cs.CV]","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_6_1","volume-title":"Mobile-Agent: Autonomous Multi-Modal Mobile Device Agent with Visual Perception. arXiv preprint arXiv:2401.16158","author":"Wang Junyang","year":"2024","unstructured":"Junyang Wang, Haiyang Xu, Jiabo Ye, Ming Yan, Weizhou Shen, Ji Zhang, Fei Huang, and Jitao Sang. 2024. Mobile-Agent: Autonomous Multi-Modal Mobile Device Agent with Visual Perception. arXiv preprint arXiv:2401.16158 (2024)."},{"key":"e_1_3_2_1_7_1","volume-title":"Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu.","author":"Wen Hao","year":"2024","unstructured":"Hao Wen, Yuanchun Li, Guohong Liu, Shanhui Zhao, Tao Yu, Toby Jia-Jun Li, Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu. 2024. AutoDroid: LLM-powered Task Automation in Android. arXiv:2308.15272 [cs.AI]"},{"key":"e_1_3_2_1_8_1","volume-title":"Visual ChatGPT: Talking, Drawing and Editing with Visual Foundation Models. ArXiv abs\/2303.04671","author":"Wu Chenfei","year":"2023","unstructured":"Chenfei Wu, Sheng-Kai Yin, Weizhen Qi, Xiaodong Wang, Zecheng Tang, and Nan Duan. 2023. Visual ChatGPT: Talking, Drawing and Editing with Visual Foundation Models. ArXiv abs\/2303.04671 (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581158"},{"key":"e_1_3_2_1_10_1","unstructured":"Qinghao Ye Haiyang Xu Jiabo Ye Ming Yan Anwen Hu Haowei Liu Qi Qian Ji Zhang Fei Huang and Jingren Zhou. 2023. mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration. arXiv:2311.04257 [cs.CL]"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_2_1_12_1","volume-title":"UFO: A UI-Focused Agent for Windows OS Interaction. arXiv preprint arXiv:2402.07939","author":"Zhang Chaoyun","year":"2024","unstructured":"Chaoyun Zhang, Liqun Li, Shilin He, Xu Zhang, Bo Qiao, Si Qin, Minghua Ma, Yu Kang, Qingwei Lin, Saravan Rajmohan, Dongmei Zhang, and Qi Zhang. 2024. UFO: A UI-Focused Agent for Windows OS Interaction. arXiv preprint arXiv:2402.07939 (2024)."},{"key":"e_1_3_2_1_13_1","unstructured":"Chi Zhang Zhao Yang Jiaxuan Liu Yucheng Han Xin Chen Zebiao Huang Bin Fu and Gang Yu. 2023. AppAgent: Multimodal Agents as Smartphone Users. arXiv:2312.13771 [cs.CV]"}],"event":{"name":"MOBISYS '24: The 22nd Annual International Conference on Mobile Systems, Applications and Services","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"],"location":"Minato-ku Tokyo Japan","acronym":"MOBISYS '24"},"container-title":["Proceedings of the Workshop on Edge and Mobile Foundation Models"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662062","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3662006.3662062","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T20:18:27Z","timestamp":1755980307000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3662006.3662062"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,3]]},"references-count":13,"alternative-id":["10.1145\/3662006.3662062","10.1145\/3662006"],"URL":"https:\/\/doi.org\/10.1145\/3662006.3662062","relation":{},"subject":[],"published":{"date-parts":[[2024,6,3]]},"assertion":[{"value":"2024-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}