{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T03:47:42Z","timestamp":1776052062690,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":101,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3772363.3778737","type":"proceedings-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T01:55:24Z","timestamp":1776045324000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Human-AI-UI Interactions Across Modalities"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-3835-0127","authenticated-orcid":false,"given":"Kewen","family":"Peng","sequence":"first","affiliation":[{"name":"Kahlert School of Computing, University of Utah, Salt Lake City, Utah, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6880-8546","authenticated-orcid":false,"given":"Jeffrey","family":"Nichols","sequence":"additional","affiliation":[{"name":"Apple, Seattle, Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0634-7569","authenticated-orcid":false,"given":"Christof","family":"Lutteroth","sequence":"additional","affiliation":[{"name":"Computer Science, University of Bath, Bath, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4928-6225","authenticated-orcid":false,"given":"Tiffany","family":"Knearem","sequence":"additional","affiliation":[{"name":"MBZUAI, Abu Dhabi, United Arab Emirates"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8115-7592","authenticated-orcid":false,"given":"Felix","family":"Kretzer","sequence":"additional","affiliation":[{"name":"human-centered systems lab (h-lab), Karlsruhe Institute of Technology (KIT), Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2072-0625","authenticated-orcid":false,"given":"Jeffrey P","family":"Bigham","sequence":"additional","affiliation":[{"name":"Human-Computer Interaction Institute, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA and Apple, Pittsburgh, Pennsylvania, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6546-4816","authenticated-orcid":false,"given":"Alexander","family":"Maedche","sequence":"additional","affiliation":[{"name":"human-centered systems lab (h-lab), Karlsruhe Institute of Technology (KIT), Karlsruhe, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0022-6512","authenticated-orcid":false,"given":"Yue","family":"Jiang","sequence":"additional","affiliation":[{"name":"Kahlert School of Computing, University of Utah, Salt Lake City, Utah, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Chongyang Bai Xiaoxue Zang Ying Xu Srinivas Sunkara Abhinav Rastogi Jindong Chen et\u00a0al. 2021. Uibert: Learning generic multimodal representations for ui understanding. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2107.13731 (2021)."},{"key":"e_1_3_3_1_3_2","volume-title":"Sketching user experiences: getting the design right and the right design","author":"Buxton Bill","year":"2010","unstructured":"Bill Buxton. 2010. Sketching user experiences: getting the design right and the right design. Morgan kaufmann."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474750"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642394"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR51125.2022.00063"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3448018.3458008"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126651"},{"key":"e_1_3_3_1_9_2","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Deng Xiang","year":"2023","unstructured":"Xiang Deng, Yu Gu, Boyuan Zheng, Shijie Chen, Samuel Stevens, Boshi Wang, Huan Sun, and Yu Su. 2023. MIND2WEB: towards a generalist agent for the web. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_3_1_10_2","unstructured":"Xiang Deng Yu Gu Boyuan Zheng Shijie Chen Sam Stevens Boshi Wang Huan Sun and Yu Su. 2024. Mind2web: Towards a generalist agent for the web. NeurIPS (2024)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Zhi-Chao Dong Wenming Wu Zenghao Xu Qi Sun Guanjie Yuan Ligang Liu and Xiao-Ming Fu. 2021. Tailored reality: Perception-aware scene restructuring for adaptive vr navigation. ACM Transactions on Graphics (TOG) 40 5 (2021) 1\u201315.","DOI":"10.1145\/3470847"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Steven\u00a0P Dow Alana Glassco Jonathan Kass Melissa Schwarz Daniel\u00a0L Schwartz and Scott\u00a0R Klemmer. 2010. Parallel prototyping leads to better design results more divergence and increased self-efficacy. ACM Transactions on Computer-Human Interaction (TOCHI) 17 4 (2010) 1\u201324.","DOI":"10.1145\/1879831.1879836"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415881"},{"key":"e_1_3_3_1_14_2","unstructured":"Peitong Duan Chin-yi Chen Gang Li Bjoern Hartmann and Yang Li. 2024. UICrit: Enhancing Automated Design Evaluation with a UICritique Dataset. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.08850 (2024)."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/2788940.2788954"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173843"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3126594.3126621"},{"key":"e_1_3_3_1_18_2","unstructured":"Sidong Feng Mingyue Yuan Jieshan Chen Zhenchang Xing and Chunyang Chen. 2023. Designing with Language: Wireframing UI Design Intent with Generative Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.07755 (2023)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/964442.964461"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2014.6948429"},{"key":"e_1_3_3_1_21_2","unstructured":"Boyu Gou Ruohan Wang Boyuan Zheng Yanan Xie Cheng Chang Yiheng Shu Huan Sun and Yu Su. 2024. Navigating the digital world as humans do: Universal visual grounding for gui agents. arXiv:https:\/\/arXiv.org\/abs\/2410.05243 (2024)."},{"key":"e_1_3_3_1_22_2","unstructured":"Boyu Gou Ruohan Wang Boyuan Zheng Yanan Xie Cheng Chang Yiheng Shu Huan Sun and Yu Su. 2024. Navigating the digital world as humans do: Universal visual grounding for gui agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.05243 (2024)."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Lei Han Tian Zheng Yinheng Zhu Lan Xu and Lu Fang. 2020. Live semantic 3d perception for immersive augmented reality. IEEE transactions on visualization and computer graphics 26 5 (2020) 2012\u20132022.","DOI":"10.1109\/TVCG.2020.2973477"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Violet\u00a0Yinuo Han Hyunsung Cho Kiyosu Maeda Alexandra Ion and David Lindlbauer. 2023. Blendmr: A computational method to create ambient mixed reality interfaces. Proceedings of the ACM on Human-Computer Interaction 7 ISS (2023) 217\u2013241.","DOI":"10.1145\/3626472"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3583960"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Dominik Herr Jan Reinhardt Guido Reina Robert Kr\u00fcger Rafael\u00a0V Ferrari and Thomas Ertl. 2018. Immersive modular factory layout planning using augmented reality. Procedia CIRP 72 (2018) 1112\u20131117.","DOI":"10.1016\/j.procir.2018.03.200"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300334"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"crossref","unstructured":"Tian Huang Chun Yu Weinan Shi Zijian Peng David Yang Weiqi Sun and Yuanchun Shi. 2025. Prompt2task: Automating ui tasks on smartphones from textual prompts. ACM Transactions on Computer-Human Interaction (2025).","DOI":"10.1145\/3716132"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Xiaowei Huang Wenjie Ruan Wei Huang Gaojie Jin Yi Dong Changshun Wu Saddek Bensalem Ronghui Mu Yi Qi Xingyu Zhao et\u00a0al. 2024. A survey of safety and trustworthiness of large language models through the lens of verification and validation. Artificial Intelligence Review 57 7 (2024) 175.","DOI":"10.1007\/s10462-024-10824-0"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173793"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-demo.17"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3638191"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676436"},{"key":"e_1_3_3_1_34_2","unstructured":"Yue Jiang Luis\u00a0A Leiva Paul\u00a0RB Houssel Hamed\u00a0R Tavakoli Julia Kylm\u00e4l\u00e4 and Antti Oulasvirta. 2024. UEyes: an eye-tracking dataset across user interface types. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.05202 (2024)."},{"key":"e_1_3_3_1_35_2","volume-title":"Workshop Paper at the 2023 CHI Conference on Human Factors in Computing Systems","author":"Jiang Yue","year":"2023","unstructured":"Yue Jiang, Luis\u00a0A Leiva, Hamed Rezazadegan\u00a0Tavakoli, Paul RB\u00a0Houssel, Julia Kylm\u00e4l\u00e4, and Antti Oulasvirta. 2023. UEyes: An Eye-Tracking Dataset across User Interface Types. In Workshop Paper at the 2023 CHI Conference on Human Factors in Computing Systems."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581096"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613905.3636316"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3573805"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3504030"},{"key":"e_1_3_3_1_40_2","unstructured":"Yue Jiang Eldon Schoop Amanda Swearngin and Jeffrey Nichols. 2023. ILuvUI: Instruction-tuned LangUage-Vision modeling of UIs from Machine Conversations. arxiv:https:\/\/arXiv.org\/abs\/2310.04869\u00a0[cs.HC]"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Dongsik Jo and Gerard\u00a0Jounghyun Kim. 2016. ARIoT: scalable augmented reality framework for interacting with Internet of Things appliances everywhere. IEEE Transactions on Consumer Electronics 62 3 (2016) 334\u2013340.","DOI":"10.1109\/TCE.2016.7613201"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.5555\/857202.858134"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3573874"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1109\/RE59067.2024.00045"},{"key":"e_1_3_3_1_45_2","unstructured":"Kristian Kolthoff Felix Kretzer Christian Bartelt Alexander Maedche and Simone\u00a0Paolo Ponzetto. 2025. GUI-ReRank: Enhancing GUI Retrieval with Multi-Modal LLM-based Reranking. arxiv:https:\/\/arXiv.org\/abs\/2508.03298\u00a0[cs.SE] https:\/\/arxiv.org\/abs\/2508.03298"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-Companion66252.2025.00010"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","unstructured":"Felix Kretzer Kristian Kolthoff Christian Bartelt Simone\u00a0Paolo Ponzetto and Alexander Maedche. 2025. Closing the Loop between User Stories and GUI Prototypes: An LLM-Based Assistant for Cross-Functional Integration in Software Development(CHI \u201925). Association for Computing Machinery New York NY USA Article 879 19\u00a0pages. 10.1145\/3706598.3713932","DOI":"10.1145\/3706598.3713932"},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300406"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3301275.3302278"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/257089.257396"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR.2019.8798018"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/VRW66409.2025.00059"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/2037373.2037467"},{"key":"e_1_3_3_1_54_2","unstructured":"Jianan Li Jimei Yang Aaron Hertzmann Jianming Zhang and Tingfa Xu. 2019. Layoutgan: Generating graphic layouts with wireframe discriminators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1901.06767 (2019)."},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445049"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676470"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445532"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586182.3616624"},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347945"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR50410.2021.00104"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR46266.2020.00113"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706599.3706736"},{"key":"e_1_3_3_1_63_2","unstructured":"Yuwen Lu Alan Leung Amanda Swearngin Jeffrey Nichols and Titus Barik. 2024. Misty: UI Prototyping Through Interactive Conceptual Blending. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.13900 (2024)."},{"key":"e_1_3_3_1_64_2","unstructured":"Yuwen Lu Ziang Tong Qinyi Zhao Chengzhi Zhang and Toby Jia-Jun Li. 2023. UI Layout Generation with LLMs Guided by UI Grammar. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.15455 (2023)."},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519809"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676331"},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642915"},{"key":"e_1_3_3_1_68_2","doi-asserted-by":"crossref","unstructured":"Brad\u00a0A Myers. 1985. The importance of percent-done progress indicators for computer-human interfaces. ACM SIGCHI Bulletin 16 4 (1985) 11\u201317.","DOI":"10.1145\/1165385.317459"},{"key":"e_1_3_3_1_69_2","unstructured":"Reiichiro Nakano Jacob Hilton Suchir Balaji Jeff Wu Long Ouyang Christina Kim Christopher Hesse Shantanu Jain Vineet Kosaraju William Saunders Xu Jiang Karl Cobbe Tyna Eloundou Gretchen Krueger Kevin Button Matthew Knight Benjamin Chess and John Schulman. 2022. WebGPT: Browser-assisted question-answering with human feedback. arxiv:https:\/\/arXiv.org\/abs\/2112.09332\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2112.09332"},{"key":"e_1_3_3_1_70_2","unstructured":"Dang Nguyen Jian Chen Yu Wang Gang Wu Namyong Park Zhengmian Hu Hanjia Lyu Junda Wu Ryan Aponte Yu Xia et\u00a0al. 2024. Gui agents: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.13501 (2024)."},{"key":"e_1_3_3_1_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/571985.572008"},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/571985.572008"},{"key":"e_1_3_3_1_73_2","volume-title":"The design of everyday things: Revised and expanded edition","author":"Norman Don","year":"2013","unstructured":"Don Norman. 2013. The design of everyday things: Revised and expanded edition. Basic books."},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858250"},{"key":"e_1_3_3_1_75_2","unstructured":"OpenAI. 2025. Computer-Using Agent: Introducing a universal interface for AI to interact with the digital world. (2025). https:\/\/openai.com\/index\/computer-using-agent"},{"key":"e_1_3_3_1_76_2","unstructured":"OpenAI. 2025. Introducing Operator. (2025). https:\/\/openai.com\/index\/introducing-operator\/"},{"key":"e_1_3_3_1_77_2","unstructured":"Yi-Hao Peng Dingzeyu Li Jeffrey\u00a0P. Bigham and Amy Pavel. 2025. Morae: Proactively Pausing UI Agents for User Choices. arxiv:https:\/\/arXiv.org\/abs\/2508.21456\u00a0[cs.HC] https:\/\/arxiv.org\/abs\/2508.21456"},{"key":"e_1_3_3_1_78_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517665"},{"key":"e_1_3_3_1_79_2","doi-asserted-by":"crossref","unstructured":"Francisco\u00a0J Romero-Ramirez Rafael Mu\u00f1oz-Salinas and Rafael Medina-Carnicer. 2018. Speeded up detection of squared fiducial markers. Image and vision Computing 76 (2018) 38\u201347.","DOI":"10.1016\/j.imavis.2018.05.004"},{"key":"e_1_3_3_1_80_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517497"},{"key":"e_1_3_3_1_81_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2008.4637354"},{"key":"e_1_3_3_1_82_2","doi-asserted-by":"crossref","unstructured":"Constantine Stephanidis Gavriel Salvendy Margherita Antona Jessie\u00a0YC Chen Jianming Dong Vincent\u00a0G Duffy Xiaowen Fang Cali Fidopiastis Gino Fragomeni Limin\u00a0Paul Fu et\u00a0al. 2019. Seven HCI grand challenges. International Journal of Human\u2013Computer Interaction 35 14 (2019) 1229\u20131269.","DOI":"10.1080\/10447318.2019.1619259"},{"key":"e_1_3_3_1_83_2","doi-asserted-by":"crossref","unstructured":"Maryam Taeb Amanda Swearngin Eldon Schoop Ruijia Cheng Yue Jiang and Jeffrey Nichols. 2023. AXNav: Replaying Accessibility Tests from Natural Language. arxiv:https:\/\/arXiv.org\/abs\/2310.02424\u00a0[cs.HC]","DOI":"10.1145\/3613904.3642777"},{"key":"e_1_3_3_1_84_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR-Adjunct51615.2020.00072"},{"key":"e_1_3_3_1_85_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676446"},{"key":"e_1_3_3_1_86_2","unstructured":"Laura Weidinger Maribeth Rauh Nahema Marchal Arianna Manzini Lisa\u00a0Anne Hendricks Juan Mateos-Garcia Stevie Bergman Jackie Kay Conor Griffin Ben Bariach et\u00a0al. 2023. Sociotechnical safety evaluation of generative ai systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.11986 (2023)."},{"key":"e_1_3_3_1_87_2","unstructured":"Fangzhou Wu Shutong Wu Yulong Cao and Chaowei Xiao. 2024. WIPI: A New Web Threat for LLM-Driven Web Agents. arxiv:https:\/\/arXiv.org\/abs\/2402.16965\u00a0[cs.CR] https:\/\/arxiv.org\/abs\/2402.16965"},{"key":"e_1_3_3_1_88_2","unstructured":"Jason Wu Yi-Hao Peng Amanda Li Amanda Swearngin Jeffrey\u00a0P Bigham and Jeffrey Nichols. 2024. UIClip: A Data-driven Model for Assessing User Interface Design. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.12500 (2024)."},{"key":"e_1_3_3_1_89_2","unstructured":"Jason Wu Eldon Schoop Alan Leung Titus Barik Jeffrey\u00a0P Bigham and Jeffrey Nichols. 2024. UICoder: Finetuning Large Language Models to Generate User Interface Code through Automated Feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.07739 (2024)."},{"key":"e_1_3_3_1_90_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581158"},{"key":"e_1_3_3_1_91_2","unstructured":"Tianbao Xie Danyang Zhang Jixuan Chen Xiaochuan Li Siheng Zhao Ruisheng Cao Toh\u00a0Jing Hua Zhoujun Cheng Dongchan Shin Fangyu Lei et\u00a0al. 2024. OSworld: Benchmarking multimodal agents for open-ended tasks in real computer environments. arXiv:https:\/\/arXiv.org\/abs\/2404.07972 (2024)."},{"key":"e_1_3_3_1_92_2","unstructured":"Jianwei Yang Hao Zhang Feng Li Xueyan Zou Chunyuan Li and Jianfeng Gao. 2023. Set-of-Mark Prompting Unleashes Extraordinary Visual Grounding in GPT-4V. arxiv:https:\/\/arXiv.org\/abs\/2310.11441\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2310.11441"},{"key":"e_1_3_3_1_93_2","unstructured":"Shunyu Yao Howard Chen John Yang and Karthik Narasimhan. 2023. WebShop: Towards Scalable Real-World Web Interaction with Grounded Language Agents. arxiv:https:\/\/arXiv.org\/abs\/2207.01206\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2207.01206"},{"key":"e_1_3_3_1_94_2","doi-asserted-by":"crossref","unstructured":"Ori Yoran Samuel\u00a0Joseph Amouyal Chaitanya Malaviya Ben Bogin Ofir Press and Jonathan Berant. 2024. Assistantbench: Can web agents solve realistic and time-consuming tasks? arXiv:https:\/\/arXiv.org\/abs\/2407.15711 (2024).","DOI":"10.18653\/v1\/2024.emnlp-main.505"},{"key":"e_1_3_3_1_95_2","doi-asserted-by":"crossref","unstructured":"Keen You Haotian Zhang Eldon Schoop Floris Weers Amanda Swearngin Jeffrey Nichols Yinfei Yang and Zhe Gan. 2024. Ferret-UI: Grounded Mobile UI Understanding with Multimodal LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.05719 (2024).","DOI":"10.1007\/978-3-031-73039-9_14"},{"key":"e_1_3_3_1_96_2","unstructured":"Manzil Zaheer Kenneth Marino Will Grathwohl John Schultz Wendy Shang Sheila Babayan Arun Ahuja Ishita Dasgupta Christine Kaeser-Chen and Rob Fergus. 2022. Learning to navigate wikipedia by taking random walks. NeurIPS (2022)."},{"key":"e_1_3_3_1_97_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714154"},{"key":"e_1_3_3_1_98_2","unstructured":"Chaoyun Zhang Shilin He Jiaxu Qian Bowen Li Liqun Li Si Qin Yu Kang Minghua Ma Guyue Liu Qingwei Lin et\u00a0al. 2024. Large language model-brained gui agents: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.18279 (2024)."},{"key":"e_1_3_3_1_99_2","doi-asserted-by":"publisher","DOI":"10.1145\/3708359.3712153"},{"key":"e_1_3_3_1_100_2","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3694608"},{"key":"e_1_3_3_1_101_2","volume-title":"ICLR","author":"Zhou Shuyan","year":"2024","unstructured":"Shuyan Zhou, Frank\u00a0F Xu, Hao Zhu, Xuhui Zhou, Robert Lo, Abishek Sridhar, Xianyi Cheng, Tianyue Ou, Yonatan Bisk, Daniel Fried, et\u00a0al. 2024. WebArena: A Realistic Web Environment for Building Autonomous Agents. In ICLR."},{"key":"e_1_3_3_1_102_2","unstructured":"Shuyan Zhou Frank\u00a0F. Xu Hao Zhu Xuhui Zhou Robert Lo Abishek Sridhar Xianyi Cheng Tianyue Ou Yonatan Bisk Daniel Fried Uri Alon and Graham Neubig. 2024. WebArena: A Realistic Web Environment for Building Autonomous Agents. arxiv:https:\/\/arXiv.org\/abs\/2307.13854\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2307.13854"}],"event":{"name":"CHI EA '26: Extended Abstracts of the 2026 CHI Conference on Human Factors in Computing Systems","location":"Barcelona , Spain","acronym":"CHI EA '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the Extended Abstracts of the 2026 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772363.3778737","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T03:18:27Z","timestamp":1776050307000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772363.3778737"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":101,"alternative-id":["10.1145\/3772363.3778737","10.1145\/3772363"],"URL":"https:\/\/doi.org\/10.1145\/3772363.3778737","relation":{},"subject":[],"published":{"date-parts":[[2026,4,13]]},"assertion":[{"value":"2026-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}