{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T22:21:31Z","timestamp":1780611691305,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T00:00:00Z","timestamp":1781913600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62432004"],"award-info":[{"award-number":["62432004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62272261"],"award-info":[{"award-number":["62272261"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fundamental and Interdisciplinary Disciplines Breakthrough Plan of the Ministry of Education of China","award":["JYB2025XDXM122"],"award-info":[{"award-number":["JYB2025XDXM122"]}]},{"name":"Wuxi Research Institute of Applied Technologies, Tsinghua University","award":["20242001120"],"award-info":[{"award-number":["20242001120"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,21]]},"DOI":"10.1145\/3745756.3809245","type":"proceedings-article","created":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T12:52:21Z","timestamp":1780059141000},"page":"883-910","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["AgentProg: Empowering Long-Horizon GUI Agents with Program-guided Context Management"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3133-5332","authenticated-orcid":false,"given":"Shizuo","family":"Tian","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8450-7795","authenticated-orcid":false,"given":"Hao","family":"Wen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6572-1290","authenticated-orcid":false,"given":"Yuxuan","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-3759-6247","authenticated-orcid":false,"given":"Jiacheng","family":"Liu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4205-0770","authenticated-orcid":false,"given":"Shanhui","family":"Zhao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5959-8604","authenticated-orcid":false,"given":"Guohong","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2782-183X","authenticated-orcid":false,"given":"Ju","family":"Ren","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-8955","authenticated-orcid":false,"given":"Yunxin","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1591-2526","authenticated-orcid":false,"given":"Yuanchun","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,20]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Saaket Agashe Kyle Wong Vincent Tu Jiachen Yang Ang Li and Xin Eric Wang 2025. Agent S2: A Compositional Generalist-Specialist Framework for Computer Use Agents. arXiv:2504.00906 [cs.AI] https:\/\/arxiv.org\/abs\/2504.00906"},{"key":"e_1_3_2_2_2_1","volume-title":"Why does the effective context length of LLMs fall short? arXiv preprint arXiv:2410.18745","author":"An Chenxin","year":"2024","unstructured":"Chenxin An, Jun Zhang, Ming Zhong, Lei Li, Shansan Gong, Yao Luo, Jingjing Xu, and Lingpeng Kong. 2024. Why does the effective context length of LLMs fall short? arXiv preprint arXiv:2410.18745 (2024)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2906388.2906416"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Kanzhi Cheng Qiushi Sun Yougang Chu Fangzhi Xu Yantao Li Jianbing Zhang and Zhiyong Wu. 2024. SeeClick: Harnessing GUI Grounding for Advanced Visual GUI Agents. arXiv:2401.10935 [cs.HC]","DOI":"10.18653\/v1\/2024.acl-long.505"},{"key":"e_1_3_2_2_5_1","volume-title":"Advancing mobile gui agents: A verifier-driven approach to practical deployment. arXiv preprint arXiv:2503.15937","author":"Dai Gaole","year":"2025","unstructured":"Gaole Dai, Shiqi Jiang, Ting Cao, Yuanchun Li, Yuqing Yang, Rui Tan, Mo Li, and Lili Qiu. 2025. Advancing mobile gui agents: A verifier-driven approach to practical deployment. arXiv preprint arXiv:2503.15937 (2025)."},{"key":"e_1_3_2_2_6_1","unstructured":"Xinzge Gao Chuanrui Hu Bin Chen and Teng Li. 2025. Chain-of-Memory: Enhancing GUI Agents for Cross-Application Navigation. arXiv:2506.18158 [cs..AI] https:\/\/arxiv.org\/abs\/2506.18158"},{"key":"e_1_3_2_2_7_1","unstructured":"Gonzalo Gonzalez-Pumariega Vincent Tu Chih-Lun Lee Jiachen Yang Ang Li and Xin Eric Wang. 2025. The Unreasonable Effectiveness of Scaling Agents for Computer Use. arXiv:2510.02250 [csAI] https:\/\/arxiv.org\/abs\/2510.02250"},{"key":"e_1_3_2_2_8_1","unstructured":"Google. 2025. Gemini 2.5 Pro - Google DeepMind. https:\/\/deepmind.google\/models\/gemini\/pro\/."},{"key":"e_1_3_2_2_9_1","volume-title":"The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=kxnoqaisCT","author":"Gou Boyu","year":"2025","unstructured":"Boyu Gou, Ruohan Wang, Boyuan Zheng, Yanan Xie, Cheng Chang, Yiheng Shu, Huan Sun, and Yu Su. 2025. Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=kxnoqaisCT"},{"key":"e_1_3_2_2_10_1","unstructured":"Zhangxuan Gu Zhengwen Zeng Zhenyu Xu Xingran Zhou Shuheng Shen Yunfei Liu Beitong Zhou Changhua Meng Tianyu Xia Weizhi Chen et al. 2025. Ui-venus technical report: Building high-performance ui agents with rft. arXiv preprint arXiv:2508.10833 (2025)."},{"key":"e_1_3_2_2_11_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=9JQtrumvg8","author":"Gur Izzeddin","year":"2024","unstructured":"Izzeddin Gur, Hiroki Furuta, Austin V Huang, Mustafa Safdari, Yutaka Matsuo, Douglas Eck, and Aleksandra Faust. 2024. A Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=9JQtrumvg8"},{"key":"e_1_3_2_2_12_1","unstructured":"Wenyi Hong Wenmeng Yu Xiaotao Gu Guo Wang Guobing Gan Haomiao Tang Jiale Cheng Ji Qi Junhui Ji Lihang Pan et al. 2025. GLM-4.1 V-Thinking: Towards Versatile Multimodal Reasoning with Scalable Reinforcement Learning. arXiv preprint arXiv:2507.01006 (2025)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.5555\/1643275.1643301"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3690682"},{"key":"e_1_3_2_2_15_1","unstructured":"Kuan Li Zhongwang Zhang Huifeng Yin Rui Ye Yida Zhao Liwen Zhang Litu Ou Dingchu Zhang Xixi Wu Jialong Wu et al. 2025. Websailor-v2: Bridging the chasm to proprietary agents via synthetic data and scalable reinforcement learning. arXiv preprint arXiv:2509.13305 (2025)."},{"key":"e_1_3_2_2_16_1","unstructured":"Ning Li Qiqiang Lin Zheng Wu Xiaoyun Mo Weiming Zhang Yin Zhao Xiangmou Qu Jiamu Zhou Jun Wang Congmin Zheng et al. 2025. ColorAgent: Building A Robust Personalized and Interactive OS Agent. arXiv preprint arXiv:2510.19386 (2025)."},{"key":"e_1_3_2_2_17_1","volume-title":"MobileUse: A GUI Agent with Hierarchical Reflection for Autonomous Mobile Operation. arXiv preprint arXiv:2507.16853","author":"Li Ning","year":"2025","unstructured":"Ning Li, Xiangmou Qu, Jiamu Zhou, Jun Wang, Muning Wen, Kounianhua Du, Xingyu Lou, Qiuying Peng, and Weinan Zhang. 2025. MobileUse: A GUI Agent with Hierarchical Reflection for Autonomous Mobile Operation. arXiv preprint arXiv:2507.16853 (2025)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3210240.3210339"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.729"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"e_1_3_2_2_21_1","volume-title":"International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1802","author":"Liu Evan Zheran","year":"2018","unstructured":"Evan Zheran Liu, Kelvin Guu, Panupong Pasupat, Tianlin Shi, and Percy Liang. 2018. Reinforcement Learning on Web Interfaces using Workflow-Guided Exploration. In International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1802.08802"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00638"},{"key":"e_1_3_2_2_23_1","volume-title":"Verigui: Verifiable long-chain gui dataset. arXiv preprint arXiv:2508.04026v1","author":"Liu Shunyu","year":"2025","unstructured":"Shunyu Liu, Minghao Liu, Huichi Zhou, Zhenyu Cui, Yang Zhou, Yuhao Zhou, Wendong Fan, Ge Zhang, Jiajun Shi, Weihao Xuan, et al. 2025. Verigui: Verifiable long-chain gui dataset. arXiv preprint arXiv:2508.04026v1 (2025)."},{"key":"e_1_3_2_2_24_1","unstructured":"Shunyu Liu Minghao Liu Huichi Zhou Zhenyu Cui Yang Zhou Yuhao Zhou Jialiang Gao Heng Zhou Yunhao Yang Wendong Fan puzhen zhang Ge Zhang Jiajun Shi Weihao Xuan Jiaxing Huang Shuang Luo Fang Wu Heli Qi Qingcheng Zeng Junjie Wang Aosong Feng Jindi Lv Sicong Jiang Ziqi Ren Wangchunshu Zhou Zhenfei Yin Wenlong Zhang Guohao Li Wenhao Yu Lei Ma Lei Bai Qunshu Lin Mingli Song and Dacheng Tao. 2026. VeriWeb: Verifiable Long-Chain Web Benchmark for Agentic Information-Seeking. arXiv preprint arXiv:2508.04026v2 (2026)."},{"key":"e_1_3_2_2_25_1","volume-title":"Madeleine Grunde-McLaughlin, et al.","author":"Mozannar Hussein","year":"2025","unstructured":"Hussein Mozannar, Gagan Bansal, Cheng Tan, Adam Fourney, Victor Dibia, Jingya Chen, Jack Gerrits, Tyler Payne, Matheus Kunzler Maldaner, Madeleine Grunde-McLaughlin, et al. 2025. Magentic-UI: Towards Human-in-the-loop Agentic Systems. arXiv preprint arXiv:507.22358 (2025)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Dang Nguyen Jian Chen Yu Wang Gang Wu Namyong Park Zhengmian Hu Hanjia Lyu Junda Wu Ryan Aponte Yu Xia Xintong Li Jing Shi Hongjie Chen Viet Dac Lai Zhouhang Xie Sungchul Kim Ruiyi Zhang Tong Yu Mehrab Tanjim Nesreen K. Ahmed Puneet Mathur Seunghyun Yoon Lina Yao Branislav Kveton Jihyung Kil Thien Huu Nguyen Trung Bui Tianyi Zhou Ryan A. Rossi and Franck Dernoncourt. 2025. GUI Agents: A Survey. arXiv:2412.13501 [cs.AI] https:\/\/arxiv.org\/abs\/2412.13501","DOI":"10.18653\/v1\/2025.findings-acl.1158"},{"key":"e_1_3_2_2_27_1","volume-title":"Ui-tars: Pioneering automated gui interaction with native agents. arXiv preprint arXiv:2501.12326","author":"Qin Yujia","year":"2025","unstructured":"Yujia Qin, Yining Ye, Junjie Fang, Haoming Wang, Shihao Liang, Shizuo Tian, Junda Zhang, Jiahao Li, Yunxin Li, Shijue Huang, et al. 2025. Ui-tars: Pioneering automated gui interaction with native agents. arXiv preprint arXiv:2501.12326 (2025)."},{"key":"e_1_3_2_2_28_1","unstructured":"Christopher Rawles Sarah Clinckemaillie Yifan Chang Jonathan Waltz Gabrielle Lau Marybeth Fair Alice Li William Bishop Wei Li Folawiyo Campbell-Ajala Daniel Toyama Robert Berry Divya Tyamagundlu Timothy Lillicrap and Oriana Riva. 2024. AndroidWorld: A Dynamic Benchmarking Environment for Autonomous Agents. arXiv:2405.14573 [cs.AI] https:\/\/arxiv.org\/abs\/2405.14573"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-long.427"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680207.3723486"},{"key":"e_1_3_2_2_31_1","volume-title":"Reflexion: Language Agents with Verbal Reinforcement Learning.","author":"Shinn Noah","year":"2023","unstructured":"Noah Shinn, Federico Cassano, Beck Labash, Ashwin Gopinath, Karthik Narasimhan, and Shunyu Yao. 2023. Reflexion: Language Agents with Verbal Reinforcement Learning."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.26.2.282"},{"key":"e_1_3_2_2_33_1","unstructured":"Linxin Song Yutong Dai Viraj Prabhu Jieyu Zhang Taiwei Shi Li Li Junnan Li Silvio Savarese Zeyuan Chen Jieyu Zhao et al. 2025. Coact-1: Computer-using agents with coding as actions. arXiv preprint arXiv:2508.03923 (2025)."},{"key":"e_1_3_2_2_34_1","volume-title":"ColorBench: Benchmarking Mobile Agents with Graph-Structured Framework for Complex Long-Horizon Tasks. arXiv preprint arXiv:2510.14621","author":"Song Yuanyi","year":"2025","unstructured":"Yuanyi Song, Heyuan Huang, Qiqiang Lin, Yin Zhao, Xiangmou Qu, Jun Wang, Xingyu Lou, Weiwen Liu, Zhuosheng Zhang, Yong Yu, Weinan Zhang, and Zhaoxiang Wang. 2025. ColorBench: Benchmarking Mobile Agents with Graph-Structured Framework for Complex Long-Horizon Tasks. arXiv preprint arXiv:2510.14621 (2025)."},{"key":"e_1_3_2_2_35_1","volume-title":"AdaPlanner: Adaptive Planning from Feedback with Language Models. arXiv preprint arXiv:2305.16653","author":"Sun Haotian","year":"2023","unstructured":"Haotian Sun, Yuchen Zhuang, Lingkai Kong, Bo Dai, and Chao Zhang. 2023. AdaPlanner: Adaptive Planning from Feedback with Language Models. arXiv preprint arXiv:2305.16653 (2023)."},{"key":"e_1_3_2_2_36_1","volume-title":"META-GUI: Towards Multi-modal Conversational Agents on Mobile GUI. arXiv preprint arXiv:2205.11029","author":"Sun Liangtai","year":"2022","unstructured":"Liangtai Sun, Xingyu Chen, Lu Chen, Tianle Dai, Zichen Zhu, and Kai Yu. 2022. META-GUI: Towards Multi-modal Conversational Agents on Mobile GUI. arXiv preprint arXiv:2205.11029 (2022)."},{"key":"e_1_3_2_2_37_1","volume-title":"Cradle: Empowering Foundation Agents Towards General Computer Control. arXiv:2403.03186 [cs.AI] https:\/\/arxiv.org\/abs\/2403.03186","author":"Tan Weihao","year":"2024","unstructured":"Weihao Tan, Wentao Zhang, Xinrun Xu, Haochong Xia, Ziluo Ding, Boyu Li, Bohan Zhou, Junpeng Yue, Jiechuan Jiang, Yewen Li, Ruyi An, Molei Qin, Chuqiao Zong, Longtao Zheng, Yujie Wu, Xiaoqiang Chai, Yifei Bi, Tianbao Xie, Pengjie Gu, Xiyun Li, Ceyao Zhang, Long Tian, Chaojie Wang, Xinrun Wang, B\u00f6rje F. Karlsson, Bo An, Shuicheng Yan, and Zongqing Lu. 2024. Cradle: Empowering Foundation Agents Towards General Computer Control. arXiv:2403.03186 [cs.AI] https:\/\/arxiv.org\/abs\/2403.03186"},{"key":"e_1_3_2_2_38_1","unstructured":"Haoming Wang Haoyang Zou Huatong Song Jiazhan Feng Junjie Fang Junting Lu Longxiang Liu Qinyu Luo Shihao Liang Shijue Huang Wanjun Zhong Yining Ye Yujia Qin Yuwen Xiong Yuxin Song Zhiyong Wu Bo Li Chen Dun Chong Liu Fuxing Leng Han rui Wang Hao Yu Haobin Chen Hongyi Guo Jing Su Jingjia Huang Kai Shen Kaiyu Shi Lin Yan Pei-Xiong Zhao Pengfei Liu Qinghao Ye Renjie Zheng Wayne Xin Zhao Wen Heng Wenhao Huang Wenqian Wang Xiao-Jun Qin Yi Lin Youbing Wu Zehui Chen Zihao Wang Baoquan Zhong Xinchun Zhang Xujing Li YuanFang Li Zhongkai Zhao Chengquan Jiang Faming Wu Hao Zhou Jinlin Pang Li Han Qianli Ma Siyao Liu Songhua Cai Wenqi Fu Xin Liu Zhi Zhang Bo Zhou Guoliang Li Jiajun Shi Jiale Yang Jie Tang Li Li Taoran Lu Woyu Lin Xiao Tong Xinyao Li Yichi Zhang Yu Miao Zheng-Wang Jiang Zili Li Zi-Hao Zhao Chenxi Li Dehua Ma Feng Lin Ge Zhang Haihua Yang Hangyu Guo Hongda Zhu Jiaheng Liu Jun-Yan Du Kai Cai Kuanye Li Lichen Yuan Mei Han Minchao Wang Shuyu Guo Tianhao Cheng Xiaobo Ma Xiao Xiao Xiaolong Huang Xinjie Chen Yi-Zhen Du Yilin Chen Yiwen Wang Zhaojian Li Zhen Yang Zhiyuan Zeng Chaolin Jin Chen Li Haolin Chen Haolin Chen Jian Chen Qinghao Zhao and Guang Shi. 2025. UI-TARS-2 Technical Report: Advancing GUI Agent with Multi-Turn Reinforcement Learning. arXiv preprint arXiv:2509.02544 (2025)."},{"key":"e_1_3_2_2_39_1","volume-title":"Mobile-agent: Autonomous multi-modal mobile device agent with visual perception. arXiv preprint arXiv:2401.16158","author":"Wang Junyang","year":"2024","unstructured":"Junyang Wang, Haiyang Xu, Jiabo Ye, Ming Yan, Weizhou Shen, Ji Zhang, Fei Huang, and Jitao Sang. 2024. Mobile-agent: Autonomous multi-modal mobile device agent with visual perception. arXiv preprint arXiv:2401.16158 (2024)."},{"key":"e_1_3_2_2_40_1","volume-title":"MobileAgentBench: An Efficient and User-Friendly Benchmark for Mobile LLM Agents. arXiv preprint arXiv:2406.08184","author":"Wang Luyuan","year":"2024","unstructured":"Luyuan Wang, Yongyu Deng, Yiwei Zha, Guodong Mao, Qinmin Wang, Tianchen Min, Wei Chen, and Shoufa Chen. 2024. MobileAgentBench: An Efficient and User-Friendly Benchmark for Mobile LLM Agents. arXiv preprint arXiv:2406.08184 (2024)."},{"key":"e_1_3_2_2_41_1","unstructured":"Xingyao Wang Yangyi Chen Lifan Yuan Yizhe Zhang Yunzhu Li Hao Peng and Heng Ji. 2024. Executable Code Actions Elicit Better LLM Agents. In ICML. arXiv:2402.01030"},{"key":"e_1_3_2_2_42_1","volume-title":"Inducing Programmatic Skills for Agentic Tasks. In Second Conference on Language Modeling. https:\/\/openreview.net\/forum?id=lsAY6fWsog","author":"Wang Zora Zhiruo","year":"2025","unstructured":"Zora Zhiruo Wang, Apurva Gandhi, Graham Neubig, and Daniel Fried. 2025. Inducing Programmatic Skills for Agentic Tasks. In Second Conference on Language Modeling. https:\/\/openreview.net\/forum?id=lsAY6fWsog"},{"key":"e_1_3_2_2_43_1","volume-title":"Oh (Eds.)","volume":"35","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, brian ichter, Fei Xia, Ed Chi, Quoc V Le, and Denny Zhou. 2022. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 24824\u201324837. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/9d5609613524ecf4f15af0f7b31abca4-Paper-Conference.pdf"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3649379"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3711875.3729134"},{"key":"e_1_3_2_2_46_1","volume-title":"Os-copilot: Towards generalist computer agents with self-improvement. arXiv preprint arXiv:2402.07456","author":"Wu Zhiyong","year":"2024","unstructured":"Zhiyong Wu, Chengcheng Han, Zichen Ding, Zhenmin Weng, Zhoumianze Liu, Shunyu Yao, Tao Yu, and Lingpeng Kong. 2024. Os-copilot: Towards generalist computer agents with self-improvement. arXiv preprint arXiv:2402.07456 (2024)."},{"key":"e_1_3_2_2_47_1","volume-title":"Paul Pu Liang, et al","author":"Wu Zhiyong","year":"2024","unstructured":"Zhiyong Wu, Zhenyu Wu, Fangzhi Xu, Yian Wang, Qiushi Sun, Chengyou Jia, Kanzhi Cheng, Zichen Ding, Liheng Chen, Paul Pu Liang, et al. 2024. OSATLAS: A Foundation Action Model for Generalist GUI Agents. arXiv preprint arXiv:2410.23218 (2024)."},{"key":"e_1_3_2_2_48_1","volume-title":"On-device language models: A comprehensive review. arXiv preprint arXiv:2409.00088","author":"Xu Jiajun","year":"2024","unstructured":"Jiajun Xu, Zhiyuan Li, Wei Chen, Qun Wang, Xin Gao, Qi Cai, and Ziyuan Ling. 2024. On-device language models: A comprehensive review. arXiv preprint arXiv:2409.00088 (2024)."},{"key":"e_1_3_2_2_49_1","volume-title":"MobileRL: Online Agentic Reinforcement Learning for Mobile GUI Agents. arXiv preprint arXiv:2509.18119","author":"Xu Yifan","year":"2025","unstructured":"Yifan Xu, Xiao Liu, Xinghan Liu, Jiaqi Fu, Hanchen Zhang, Bohao Jing, Shudan Zhang, Yuting Wang, Wenyi Zhao, and Yuxiao Dong. 2025. MobileRL: Online Agentic Reinforcement Learning for Mobile GUI Agents. arXiv preprint arXiv:2509.18119 (2025)."},{"key":"e_1_3_2_2_50_1","unstructured":"Yifan Xu Xiao Liu Xueqiao Sun Siyi Cheng Hao Yu Hanyu Lai Shudan Zhang Dan Zhang Jie Tang and Yuxiao Dong. 2024. AndroidLab: Training and Systematic Benchmarking of Android Autonomous Agents. arXiv:2410.24024 [cs.AI] https:\/\/arxiv.org\/abs\/2410.24024"},{"key":"e_1_3_2_2_51_1","volume-title":"Wonderland: Large Multimodal Models for Zero-Shot Smartphone GUI Navigation. arXiv preprint arXiv:2311.07562","author":"Yan An","year":"2023","unstructured":"An Yan, Zhengyuan Yang, Wanrong Zhu, Kevin Lin, Linjie Li, Jianfeng Wang, Jianwei Yang, Yiwu Zhong, Julian McAuley, Jianfeng Gao, et al. 2023. GPT-4V in Wonderland: Large Multimodal Models for Zero-Shot Smartphone GUI Navigation. arXiv preprint arXiv:2311.07562 (2023)."},{"key":"e_1_3_2_2_52_1","unstructured":"Shunyu Yao Dian Yu Jeffrey Zhao Izhak Shafran Thomas L. Griffiths Yuan Cao and Karthik Narasimhan. 2023. Tree of Thoughts: Deliberate Problem Solving with Large Language Models. arXiv:2305.10601 [cs.CL]"},{"key":"e_1_3_2_2_53_1","unstructured":"Jiabo Ye Xi Zhang Haiyang Xu Haowei Liu Junyang Wang Zhaoqing Zhu Ziwei Zheng Feiyu Gao Junjie Cao Zhengxi Lu et al. 2025. Mobile-agent-v3: Fundamental agents for gui automation. arXiv preprint arXiv:2508.15144 (2025)."},{"key":"e_1_3_2_2_54_1","volume-title":"RealWebAssist: A Benchmark for Long-Horizon Web Assistance with Real-World Users. arXiv preprint arXiv:2504.10445","author":"Ye Suyu","year":"2025","unstructured":"Suyu Ye, Haojun Shi, Darren Shih, Hyokun Yun, Tanya Roosta, and Tianmin Shu. 2025. RealWebAssist: A Benchmark for Long-Horizon Web Assistance with Real-World Users. arXiv preprint arXiv:2504.10445 (2025)."},{"key":"e_1_3_2_2_55_1","volume-title":"UFO: A UI-Focused Agent for Windows OS Interaction. arXiv preprint arXiv:2402.07939","author":"Zhang Chaoyun","year":"2024","unstructured":"Chaoyun Zhang, Liqun Li, Shilin He, Xu Zhang, Bo Qiao, Si Qin, Minghua Ma, Yu Kang, Qingwei Lin, Saravan Rajmohan, Dongmei Zhang, and Qi Zhang. 2024. UFO: A UI-Focused Agent for Windows OS Interaction. arXiv preprint arXiv:2402.07939 (2024)."},{"key":"e_1_3_2_2_56_1","unstructured":"Chi Zhang Zhao Yang Jiaxuan Liu Yucheng Han Xin Chen Zebiao Huang Bin Fu and Gang Yu. 2023. AppAgent: Multimodal Agents as Smartphone Users. arXiv:2312.13771 [cs.CV]"},{"key":"e_1_3_2_2_57_1","volume-title":"ReCAP: Recursive Context-Aware Reasoning and Planning for Large Language Model Agents. In Conference on Neural Information Processing Systems (NeurIPS).","author":"Zhang Zhenyu","year":"2025","unstructured":"Zhenyu Zhang, Tianyi Chen, Weiran Xu, Alex Pentland, and Jiaxin Pei. 2025. ReCAP: Recursive Context-Aware Reasoning and Planning for Large Language Model Agents. In Conference on Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.186"},{"key":"e_1_3_2_2_59_1","volume-title":"Forty-first International Conference on Machine Learning (ICML'24)","author":"Zheng Boyuan","year":"2024","unstructured":"Boyuan Zheng, Boyu Gou, Jihyung Kil, Huan Sun, and Yu Su. 2024. GPT-4V(ision) is a Generalist Web Agent, if Grounded. In Forty-first International Conference on Machine Learning (ICML'24). https:\/\/openreview.net\/forum?id=piecKJ2DlB"},{"key":"e_1_3_2_2_60_1","volume-title":"Bryan Kian Hsiang Low, and Paul Pu Liang","author":"Zhou Zijian","year":"2025","unstructured":"Zijian Zhou, Ao Qu, Zhaoxuan Wu, Sunghwan Kim, Alok Prakash, Daniela Rus, Jinhua Zhao, Bryan Kian Hsiang Low, and Paul Pu Liang. 2025. MEM1: Learning to Synergize Memory and Reasoning for Efficient Long-Horizon Agents. https:\/\/arxiv.org\/abs\/2506.15841"}],"event":{"name":"MobiSys '26: 24th Annual International Conference on Mobile Systems, Applications and Services","location":"University of Cambridge Cambridge United Kingdom","acronym":"MobiSys '26","sponsor":["SIGMOBILE ACM Special Interest Group on Mobility of Systems, Users, Data and Computing","SIGOPS ACM Special Interest Group on Operating Systems"]},"container-title":["Proceedings of the 24th Annual International Conference on Mobile Systems, Applications and Services"],"original-title":[],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T12:53:21Z","timestamp":1780059201000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3745756.3809245"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,20]]},"references-count":60,"alternative-id":["10.1145\/3745756.3809245","10.1145\/3745756"],"URL":"https:\/\/doi.org\/10.1145\/3745756.3809245","relation":{},"subject":[],"published":{"date-parts":[[2026,6,20]]},"assertion":[{"value":"2026-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}