{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,18]],"date-time":"2026-07-18T02:36:18Z","timestamp":1784342178535,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":81,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T00:00:00Z","timestamp":1728604800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,13]]},"DOI":"10.1145\/3654777.3676356","type":"proceedings-article","created":{"date-parts":[[2024,10,11]],"date-time":"2024-10-11T10:50:36Z","timestamp":1728643836000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["GPTVoiceTasker: Advancing Multi-step Mobile Task Efficiency Through Dynamic Interface Exploration and Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4798-8701","authenticated-orcid":false,"given":"Minh Duc","family":"Vu","sequence":"first","affiliation":[{"name":"CSIRO's Data61, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7862-6677","authenticated-orcid":false,"given":"Han","family":"Wang","sequence":"additional","affiliation":[{"name":"Faculty of IT, Monash University, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2700-7478","authenticated-orcid":false,"given":"Jieshan","family":"Chen","sequence":"additional","affiliation":[{"name":"CSIRO's Data61, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9808-9992","authenticated-orcid":false,"given":"Zhuang","family":"Li","sequence":"additional","affiliation":[{"name":"Faculty of IT, Monash University, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7971-3107","authenticated-orcid":false,"given":"Shengdong","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Creative Media, City University of Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7663-1421","authenticated-orcid":false,"given":"Zhenchang","family":"Xing","sequence":"additional","affiliation":[{"name":"CSIRO's Data61 &amp; Australian National University, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2011-9618","authenticated-orcid":false,"given":"Chunyang","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computation, Information and Technology, Technical University of Munich, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Voice Access. 2022. Troubleshoot Voice Access. https:\/\/support.google.com\/accessibility\/android\/answer\/6377053?hl=en#:\u00a0:text=If%20you%20have%20trouble%20starting Access%20from%20the%20lock%20screen."},{"key":"e_1_3_2_2_2_1","unstructured":"Apple. [n. d.]. Siri. https:\/\/www.apple.com\/au\/siri\/"},{"key":"e_1_3_2_2_3_1","volume-title":"App-Based Task Shortcuts for Virtual Assistants. In The 34th Annual ACM Symposium on User Interface Software and Technology. 1089\u20131099","author":"Arsan Deniz","year":"2021","unstructured":"Deniz Arsan, Ali Zaidi, Aravind Sagar, and Ranjitha Kumar. 2021. App-Based Task Shortcuts for Virtual Assistants. In The 34th Annual ACM Symposium on User Interface Software and Technology. 1089\u20131099."},{"key":"e_1_3_2_2_4_1","unstructured":"Google Assistant. 2022. Assistant. https:\/\/assistant.google.com\/intl\/en_au\/platforms\/phones\/"},{"key":"e_1_3_2_2_5_1","volume-title":"3rd International Conference on Learning Representations, ICLR","author":"Bahdanau Dzmitry","year":"2015","unstructured":"Dzmitry Bahdanau, Kyung\u00a0Hyun Cho, and Yoshua Bengio. 2015. Neural machine translation by jointly learning to align and translate. In 3rd International Conference on Learning Representations, ICLR 2015."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3529299.3531493"},{"key":"e_1_3_2_2_7_1","volume-title":"An empirical evaluation of the system usability scale. Intl. Journal of Human\u2013Computer Interaction 24, 6","author":"Bangor Aaron","year":"2008","unstructured":"Aaron Bangor, Philip\u00a0T Kortum, and James\u00a0T Miller. 2008. An empirical evaluation of the system usability scale. Intl. Journal of Human\u2013Computer Interaction 24, 6 (2008), 574\u2013594."},{"key":"e_1_3_2_2_8_1","volume-title":"Code generation tools (almost) for free? a study of few-shot, pre-trained language models on code. arXiv preprint arXiv:2206.01335","author":"Barei\u00df Patrick","year":"2022","unstructured":"Patrick Barei\u00df, Beatriz Souza, Marcelo d\u2019Amorim, and Michael Pradel. 2022. Code generation tools (almost) for free? a study of few-shot, pre-trained language models on code. arXiv preprint arXiv:2206.01335 (2022)."},{"key":"e_1_3_2_2_9_1","article-title":"Smart Voice Assistant: a universal voice control solution for non-visual access to the Android operating system","volume":"4","author":"Bhalerao Aditi","year":"2017","unstructured":"Aditi Bhalerao, Samira Bhilare, Anagha Bondade, and Monal Shingade. 2017. Smart Voice Assistant: a universal voice control solution for non-visual access to the Android operating system. Int. Res. J. Eng. Technol 4, 2 (2017).","journal-title":"Int. Res. J. Eng. Technol"},{"key":"e_1_3_2_2_10_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20074-8_18"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445409"},{"key":"e_1_3_2_2_13_1","volume-title":"Revolutionizing Mobile Interaction: Enabling a 3 Billion Parameter GPT LLM on Mobile. arXiv preprint arXiv:2310.01434","author":"Carreira Samuel","year":"2023","unstructured":"Samuel Carreira, Tom\u00e1s Marques, Jos\u00e9 Ribeiro, and Carlos Grilo. 2023. Revolutionizing Mobile Interaction: Enabling a 3 Billion Parameter GPT LLM on Mobile. arXiv preprint arXiv:2310.01434 (2023)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502073"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-40744-4_2"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00070"},{"key":"e_1_3_2_2_17_1","volume-title":"Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588","author":"Chen Wenhu","year":"2022","unstructured":"Wenhu Chen, Xueguang Ma, Xinyi Wang, and William\u00a0W Cohen. 2022. Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:2211.12588 (2022)."},{"key":"e_1_3_2_2_18_1","volume-title":"Mind2web: Towards a generalist agent for the web. Advances in Neural Information Processing Systems 36","author":"Deng Xiang","year":"2024","unstructured":"Xiang Deng, Yu Gu, Boyuan Zheng, Shijie Chen, Sam Stevens, Boshi Wang, Huan Sun, and Yu Su. 2024. Mind2web: Towards a generalist agent for the web. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_19_1","volume-title":"Wiley StatsRef: Statistics Reference Online","author":"DePuy Venita","year":"2014","unstructured":"Venita DePuy and Vance\u00a0W Berger. 2014. Counterbalancing. Wiley StatsRef: Statistics Reference Online (2014)."},{"key":"e_1_3_2_2_20_1","unstructured":"Amazon Developer. 2024. Build LLM-powered Alexa experiences. https:\/\/developer.amazon.com\/en-US\/alexa\/alexa-ai"},{"key":"e_1_3_2_2_21_1","unstructured":"Android Developers. 2022. AccessibilityNodeInfo. https:\/\/developer.android.com\/reference\/android\/view\/accessibility\/AccessibilityNodeInfo.AccessibilityAction"},{"key":"e_1_3_2_2_22_1","unstructured":"Android Developers. 2022. AccessibilityService. https:\/\/developer.android.com\/guide\/topics\/ui\/accessibility\/service"},{"key":"e_1_3_2_2_23_1","unstructured":"Apple Developers. 2024. Handle SiriKit intents in an Intents extension. https:\/\/developer.apple.com\/documentation\/xcode\/configuring-siri-support#Handle-SiriKit-intents-in-an-Intents-extension"},{"key":"e_1_3_2_2_24_1","unstructured":"Google Developers. 2024. Custom Intents. https:\/\/developers.google.com\/assistant\/app\/custom-intents"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3236024.3236045"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474795"},{"key":"e_1_3_2_2_27_1","volume-title":"Prompting Is All Your Need: Automated Android Bug Replay with Large Language Models. arXiv preprint arXiv:2306.01987","author":"Feng Sidong","year":"2023","unstructured":"Sidong Feng and Chunyang Chen. 2023. Prompting Is All Your Need: Automated Android Bug Replay with Large Language Models. arXiv preprint arXiv:2306.01987 (2023)."},{"key":"e_1_3_2_2_28_1","volume-title":"Large language model AI chatbots require approval as medical devices. Nature Medicine","author":"Gilbert Stephen","year":"2023","unstructured":"Stephen Gilbert, Hugh Harvey, Tom Melvin, Erik Vollebregt, and Paul Wicks. 2023. Large language model AI chatbots require approval as medical devices. Nature Medicine (2023), 1\u20133."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1287\/opre.24.6.1164"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1037\/e577632012-009"},{"key":"e_1_3_2_2_31_1","volume-title":"Advances in natural language processing. Science 349, 6245","author":"Hirschberg Julia","year":"2015","unstructured":"Julia Hirschberg and Christopher\u00a0D. Manning. 2015. Advances in natural language processing. Science 349, 6245 (2015), 261\u2013266."},{"key":"e_1_3_2_2_32_1","volume-title":"an introduction to voice assistants. Medical reference services quarterly 37, 1","author":"Hoy B","year":"2018","unstructured":"Matthew\u00a0B Hoy. 2018. Alexa, Siri, Cortana, and more: an introduction to voice assistants. Medical reference services quarterly 37, 1 (2018), 81\u201388."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606833"},{"key":"e_1_3_2_2_34_1","volume-title":"Privacy attitudes and privacy behaviour: A review of current research on the privacy paradox phenomenon. Computers & security 64","author":"Kokolakis Spyros","year":"2017","unstructured":"Spyros Kokolakis. 2017. Privacy attitudes and privacy behaviour: A review of current research on the privacy paradox phenomenon. Computers & security 64 (2017), 122\u2013134."},{"key":"e_1_3_2_2_35_1","volume-title":"1st Symposium on Simplicity in Algorithms (SOSA","author":"Kopelowitz Tsvi","year":"2018","unstructured":"Tsvi Kopelowitz and Ely Porat. 2018. A simple algorithm for approximating the text-to-pattern hamming distance. In 1st Symposium on Simplicity in Algorithms (SOSA 2018). Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik."},{"key":"e_1_3_2_2_36_1","first-page":"163","article-title":"Zigbee based voice control system for smart home","volume":"3","author":"Krishna Y\u00a0Bala","year":"2012","unstructured":"Y\u00a0Bala Krishna and S Nagendram. 2012. Zigbee based voice control system for smart home. International Journal on Computer Technology and Applications 3, 1 (2012), 163\u2013168.","journal-title":"International Journal on Computer Technology and Applications"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/VL\/HCC53370.2022.9833005"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3106237.3117769"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642230"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502042"},{"key":"e_1_3_2_2_41_1","volume-title":"Proceedings of the 2017 CHI conference on human factors in computing systems. 6038\u20136049","author":"Jia-Jun Li Toby","year":"2017","unstructured":"Toby Jia-Jun Li, Amos Azaria, and Brad\u00a0A Myers. 2017. SUGILITE: creating multimodal smartphone automation by demonstration. In Proceedings of the 2017 CHI conference on human factors in computing systems. 6038\u20136049."},{"key":"e_1_3_2_2_42_1","volume-title":"Mapping natural language instructions to mobile UI action sequences. arXiv preprint arXiv:2005.03776","author":"Li Yang","year":"2020","unstructured":"Yang Li, Jiacong He, Xin Zhou, Yuan Zhang, and Jason Baldridge. 2020. Mapping natural language instructions to mobile UI action sequences. arXiv preprint arXiv:2005.03776 (2020)."},{"key":"e_1_3_2_2_43_1","volume-title":"Lang2LTL: Translating Natural Language Commands to Temporal Robot Task Specification. arXiv preprint arXiv:2302.11649","author":"Liu Jason\u00a0Xinyu","year":"2023","unstructured":"Jason\u00a0Xinyu Liu, Ziyi Yang, Ifrah Idrees, Sam Liang, Benjamin Schornstein, Stefanie Tellex, and Ankit Shah. 2023. Lang2LTL: Translating Natural Language Commands to Temporal Robot Task Specification. arXiv preprint arXiv:2302.11649 (2023)."},{"key":"e_1_3_2_2_44_1","volume-title":"Voice helper: A mobile assistive system for visually impaired persons. In 2015 IEEE International Conference on Computer and Information Technology","author":"Liu Kuei-Chun","unstructured":"Kuei-Chun Liu, Ching-Hung Wu, Shau-Yin Tseng, and Yin-Te Tsai. 2015. Voice helper: A mobile assistive system for visually impaired persons. In 2015 IEEE International Conference on Computer and Information Technology; Ubiquitous Computing and Communications; Dependable, Autonomic and Secure Computing; Pervasive Intelligence and Computing. IEEE, 1400\u20131405."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380242"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501825"},{"key":"e_1_3_2_2_47_1","volume-title":"Agentbench: Evaluating llms as agents. arXiv preprint arXiv:2308.03688","author":"Liu Xiao","year":"2023","unstructured":"Xiao Liu, Hao Yu, Hanchen Zhang, Yifan Xu, Xuanyu Lei, Hanyu Lai, Yu Gu, Hangliang Ding, Kaiwen Men, Kejuan Yang, 2023. Agentbench: Evaluating llms as agents. arXiv preprint arXiv:2308.03688 (2023)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00119"},{"key":"e_1_3_2_2_49_1","volume-title":"Chatting with GPT-3 for Zero-Shot Human-Like Mobile Automated GUI Testing. arXiv preprint arXiv:2305.09434","author":"Liu Zhe","year":"2023","unstructured":"Zhe Liu, Chunyang Chen, Junjie Wang, Mengzhuo Chen, Boyu Wu, Xing Che, Dandan Wang, and Qing Wang. 2023. Chatting with GPT-3 for Zero-Shot Human-Like Mobile Automated GUI Testing. arXiv preprint arXiv:2305.09434 (2023)."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173580"},{"key":"e_1_3_2_2_51_1","unstructured":"OpenAI. 2023. API Reference - OpenAI API. https:\/\/platform.openai.com\/docs\/api-reference\/introduction"},{"key":"e_1_3_2_2_52_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_53_1","volume-title":"AutoTask: Executing Arbitrary Voice Commands by Exploring and Learning from Mobile GUI. arXiv preprint arXiv:2312.16062","author":"Pan Lihang","year":"2023","unstructured":"Lihang Pan, Bowen Wang, Chun Yu, Yuxuan Chen, Xiangyu Zhang, and Yuanchun Shi. 2023. AutoTask: Executing Arbitrary Voice Commands by Exploring and Learning from Mobile GUI. arXiv preprint arXiv:2312.16062 (2023)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517459"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.3390\/su10020488"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"e_1_3_2_2_57_1","volume-title":"True few-shot learning with language models. Advances in neural information processing systems 34","author":"Perez Ethan","year":"2021","unstructured":"Ethan Perez, Douwe Kiela, and Kyunghyun Cho. 2021. True few-shot learning with language models. Advances in neural information processing systems 34 (2021), 11054\u201311070."},{"key":"e_1_3_2_2_58_1","volume-title":"Md Nafizur, The AI Race is on","author":"Rahaman Md\u00a0Saidur","year":"2023","unstructured":"Md\u00a0Saidur Rahaman, MM Ahsan, Nishath Anjum, Md\u00a0Mizanur Rahman, and Md\u00a0Nafizur Rahman. 2023. The AI race is on! Google\u2019s Bard and OpenAI\u2019s ChatGPT head to head: an opinion article. Mizanur and Rahman, Md Nafizur, The AI Race is on (2023)."},{"key":"e_1_3_2_2_59_1","volume-title":"International Conference on Machine Learning. PMLR, 8821\u20138831","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-shot text-to-image generation. In International Conference on Machine Learning. PMLR, 8821\u20138831."},{"key":"e_1_3_2_2_60_1","volume-title":"AndroidInTheWild: A Large-Scale Dataset For Android Device Control. Advances in Neural Information Processing Systems 36","author":"Rawles Christopher","year":"2024","unstructured":"Christopher Rawles, Alice Li, Daniel Rodriguez, Oriana Riva, and Timothy Lillicrap. 2024. AndroidInTheWild: A Large-Scale Dataset For Android Device Control. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1002\/pri.66"},{"key":"e_1_3_2_2_62_1","volume-title":"International Conference on Machine Learning. PMLR, 3135\u20133144","author":"Shi Tianlin","year":"2017","unstructured":"Tianlin Shi, Andrej Karpathy, Linxi Fan, Jonathan Hernandez, and Percy Liang. 2017. World of bits: An open-domain platform for web-based agents. In International Conference on Machine Learning. PMLR, 3135\u20133144."},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.23919\/FUSION49751.2022.9841287"},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"e_1_3_2_2_65_1","volume-title":"Does fine-tuning GPT-3 with the OpenAI API leak personally-identifiable information?arXiv preprint arXiv:2307.16382","author":"Sun Albert\u00a0Yu","year":"2023","unstructured":"Albert\u00a0Yu Sun, Eliott Zemour, Arushi Saxena, Udith Vaidyanathan, Eric Lin, Christian Lau, and Vaikkunth Mugunthan. 2023. Does fine-tuning GPT-3 with the OpenAI API leak personally-identifiable information?arXiv preprint arXiv:2307.16382 (2023)."},{"key":"e_1_3_2_2_66_1","volume-title":"Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581998"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580895"},{"key":"e_1_3_2_2_69_1","volume-title":"LLM4Vis: Explainable Visualization Recommendation using ChatGPT. arXiv preprint arXiv:2310.07652","author":"Wang Lei","year":"2023","unstructured":"Lei Wang, Songheng Zhang, Yun Wang, Ee-Peng Lim, and Yong Wang. 2023. LLM4Vis: Explainable Visualization Recommendation using ChatGPT. arXiv preprint arXiv:2310.07652 (2023)."},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P15-1129"},{"key":"e_1_3_2_2_71_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Ed Chi, Quoc Le, and Denny Zhou. 2022. Chain of thought prompting elicits reasoning in large language models. arXiv preprint arXiv:2201.11903 (2022)."},{"key":"e_1_3_2_2_72_1","volume-title":"Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu.","author":"Wen Hao","year":"2023","unstructured":"Hao Wen, Yuanchun Li, Guohong Liu, Shanhui Zhao, Tao Yu, Toby Jia-Jun Li, Shiqi Jiang, Yunhao Liu, Yaqin Zhang, and Yunxin Liu. 2023. Empowering LLM to use Smartphone for Intelligent Task Automation. arXiv preprint arXiv:2308.15272 (2023)."},{"key":"e_1_3_2_2_73_1","volume-title":"Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564","author":"Wu Shijie","year":"2023","unstructured":"Shijie Wu, Ozan Irsoy, Steven Lu, Vadim Dabravolski, Mark Dredze, Sebastian Gehrmann, Prabhanjan Kambadur, David Rosenberg, and Gideon Mann. 2023. Bloomberggpt: A large language model for finance. arXiv preprint arXiv:2303.17564 (2023)."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.2008.929381"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.31"},{"key":"e_1_3_2_2_76_1","unstructured":"Kannon Yamada. 2020. How to control your Android device entirely with your voice. https:\/\/www.makeuseof.com\/tag\/control-android-device-entirely-voice\/"},{"key":"e_1_3_2_2_77_1","volume-title":"Gpt-4v in wonderland: Large multimodal models for zero-shot smartphone gui navigation. arXiv preprint arXiv:2311.07562","author":"Yan An","year":"2023","unstructured":"An Yan, Zhengyuan Yang, Wanrong Zhu, Kevin Lin, Linjie Li, Jianfeng Wang, Jianwei Yang, Yiwu Zhong, Julian McAuley, Jianfeng Gao, 2023. Gpt-4v in wonderland: Large multimodal models for zero-shot smartphone gui navigation. arXiv preprint arXiv:2311.07562 (2023)."},{"key":"e_1_3_2_2_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415841"},{"key":"e_1_3_2_2_79_1","doi-asserted-by":"publisher","DOI":"10.1145\/3699765"},{"key":"e_1_3_2_2_80_1","volume-title":"JustSpeak. Proceedings of the 11th Web for All Conference on - W4A 14","author":"Zhong Yu","year":"2014","unstructured":"Yu Zhong, T.\u00a0V. Raman, Casey Burkhardt, Fadi Biadsy, and Jeffrey\u00a0P. Bigham. 2014. JustSpeak. Proceedings of the 11th Web for All Conference on - W4A 14 (2014)."},{"key":"e_1_3_2_2_81_1","volume-title":"Least-to-most prompting enables complex reasoning in large language models. arXiv preprint arXiv:2205.10625","author":"Zhou Denny","year":"2022","unstructured":"Denny Zhou, Nathanael Sch\u00e4rli, Le Hou, Jason Wei, Nathan Scales, Xuezhi Wang, Dale Schuurmans, Claire Cui, Olivier Bousquet, Quoc Le, 2022. Least-to-most prompting enables complex reasoning in large language models. arXiv preprint arXiv:2205.10625 (2022)."}],"event":{"name":"UIST '24: The 37th Annual ACM Symposium on User Interface Software and Technology","location":"Pittsburgh PA USA","acronym":"UIST '24"},"container-title":["Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676356","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3654777.3676356","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,4]],"date-time":"2025-08-04T21:08:49Z","timestamp":1754341729000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3654777.3676356"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,11]]},"references-count":81,"alternative-id":["10.1145\/3654777.3676356","10.1145\/3654777"],"URL":"https:\/\/doi.org\/10.1145\/3654777.3676356","relation":{},"subject":[],"published":{"date-parts":[[2024,10,11]]},"assertion":[{"value":"2024-10-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}