{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,4]],"date-time":"2026-07-04T16:20:12Z","timestamp":1783182012779,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706599.3719729","type":"proceedings-article","created":{"date-parts":[[2025,4,23]],"date-time":"2025-04-23T20:44:19Z","timestamp":1745441059000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":22,"title":["UXAgent: An LLM Agent-Based Usability Testing Framework for Web Design"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8520-0540","authenticated-orcid":false,"given":"Yuxuan","family":"Lu","sequence":"first","affiliation":[{"name":"Northeastern University, Boston, Massachusetts, USA and Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-8329-4610","authenticated-orcid":false,"given":"Bingsheng","family":"Yao","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, Massachusetts, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1426-3210","authenticated-orcid":false,"given":"Hansu","family":"Gu","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8769-9130","authenticated-orcid":false,"given":"Jing","family":"Huang","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0959-8323","authenticated-orcid":false,"given":"Zheshen Jessie","family":"Wang","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, Washington, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-4269-2904","authenticated-orcid":false,"given":"Yang","family":"Li","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1188-2921","authenticated-orcid":false,"given":"Jiri","family":"Gesi","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5257-6843","authenticated-orcid":false,"given":"Qi","family":"He","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7902-7625","authenticated-orcid":false,"given":"Toby Jia-Jun","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Notre Dame, Notre Dame, Indiana, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9371-9441","authenticated-orcid":false,"given":"Dakuo","family":"Wang","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, Massachusetts, USA and Amazon, Palo Alto, California, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_2_2_2","volume-title":"Usability Testing Essentials: Ready, Set... Test!","author":"Barnum Carol\u00a0M","year":"2020","unstructured":"Carol\u00a0M Barnum. 2020. Usability Testing Essentials: Ready, Set... Test!Morgan Kaufmann."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","unstructured":"J.M.\u00a0Christian Bastien. 2010. Usability Testing: A Review of Some Methodological and Technical Aspects of the Method. International Journal of Medical Informatics 79 4 (April 2010) e18\u2013e23. 10.1016\/j.ijmedinf.2008.12.004","DOI":"10.1016\/j.ijmedinf.2008.12.004"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642363"},{"key":"e_1_3_3_2_5_2","unstructured":"Chaoran Chen Bingsheng Yao Ruishi Zou Wenyue Hua Weimin Lyu Toby Jia-Jun Li and Dakuo Wang. 2025. Towards a Design Guideline for RPA Evaluation: A Survey of Large Language Model-Based Role-Playing Agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.13012 (2025)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"crossref","unstructured":"Ziyan Cui Ning Li and Huaikang Zhou. 2024. Can AI Replace Human Subjects? A Large-Scale Replication of Psychological Experiments with LLMs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.00128 (2024).","DOI":"10.2139\/ssrn.4940173"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"publisher","unstructured":"Heidi Decker-Maurer. 2012. Method Madness: A Usability Testing Pilot Research Case Study. Commun. Des. Q. Rev 13 1 (March 2012) 14\u201318. 10.1145\/2424837.2424839","DOI":"10.1145\/2424837.2424839"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/ACSOS49614.2020.00038"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSTW52544.2021.00049"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2208365"},{"key":"e_1_3_3_2_11_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Gur Izzeddin","year":"2023","unstructured":"Izzeddin Gur, Hiroki Furuta, Austin\u00a0V. Huang, Mustafa Safdari, Yutaka Matsuo, Douglas Eck, and Aleksandra Faust. 2023. A Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=9JQtrumvg8"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Onder Gurcan. 2024. LLM-Augmented Agent-Based Modelling for Social Simulations: Challenges and Opportunities. 10.48550\/arXiv.2405.06700 arxiv:https:\/\/arXiv.org\/abs\/2405.06700\u00a0[physics]","DOI":"10.48550\/arXiv.2405.06700"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","unstructured":"Morten Hertzum and Niels\u00a0Ebbe Jacobsen. 2003. The Evaluator Effect: A Chilling Fact About Usability Evaluation Methods. International Journal of Human-Computer Interaction (Feb. 2003). 10.1207\/S15327590IJHC1501_14","DOI":"10.1207\/S15327590IJHC1501_14"},{"key":"e_1_3_3_2_14_2","unstructured":"Daniel Kahneman. 2011. Thinking fast and slow. Farrar Straus and Giroux (2011)."},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581247"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517647"},{"key":"e_1_3_3_2_17_2","unstructured":"Kyuwon Lee Simone Paci Jeongmin Park Hye\u00a0Young You and Sylvan Zheng. [n. d.]. Applications of GPT in Political Science Research. ([n. d.])."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","unstructured":"James\u00a0R Lewis. 2012. Usability Testing. Handbook of human factors and ergonomics (2012) 1267\u20131312. 10.1002\/9781118131350.ch46","DOI":"10.1002\/9781118131350.ch46"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","unstructured":"Junkai Li Siyu Wang Meng Zhang Weitao Li Yunghwei Lai Xinhui Kang Weizhi Ma and Yang Liu. 2024. Agent Hospital: A Simulacrum of Hospital with Evolvable Medical Agents. 10.48550\/arXiv.2405.02957 arxiv:https:\/\/arXiv.org\/abs\/2405.02957\u00a0[cs]","DOI":"10.48550\/arXiv.2405.02957"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","unstructured":"Tianshi Li Sauvik Das Hao-Ping Lee Dakuo Wang Bingsheng Yao and Zhiping Zhang. 2024. Human-Centered Privacy Research in the Age of Large Language Models. 10.48550\/arXiv.2402.01994 arxiv:https:\/\/arXiv.org\/abs\/2402.01994\u00a0[cs]","DOI":"10.48550\/arXiv.2402.01994"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Kaixin Ma Hongming Zhang Hongwei Wang Xiaoman Pan Wenhao Yu and Dong Yu. 2024. LASER: LLM Agent with State-Space Exploration for Web Navigation. 10.48550\/arXiv.2309.08172 arxiv:https:\/\/arXiv.org\/abs\/2309.08172\u00a0[cs]","DOI":"10.48550\/arXiv.2309.08172"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","unstructured":"Reiichiro Nakano Jacob Hilton Suchir Balaji Jeff Wu Long Ouyang Christina Kim Christopher Hesse Shantanu Jain Vineet Kosaraju William Saunders Xu Jiang Karl Cobbe Tyna Eloundou Gretchen Krueger Kevin Button Matthew Knight Benjamin Chess and John Schulman. 2022. WebGPT: Browser-assisted Question-Answering with Human Feedback. 10.48550\/arXiv.2112.09332 arxiv:https:\/\/arXiv.org\/abs\/2112.09332\u00a0[cs]","DOI":"10.48550\/arXiv.2112.09332"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/1142405.1142439"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606763"},{"key":"e_1_3_3_2_25_2","unstructured":"Joon\u00a0Sung Park Carolyn\u00a0Q. Zou Aaron Shaw Benjamin\u00a0Mako Hill Carrie Cai Meredith\u00a0Ringel Morris Robb Willer Percy Liang and Michael\u00a0S. Bernstein. 2024. Generative Agent Simulations of 1 000 People. arxiv:https:\/\/arXiv.org\/abs\/2411.10109http:\/\/arxiv.org\/abs\/2411.10109"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3575792"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","unstructured":"Chen Qian Wei Liu Hongzhang Liu Nuo Chen Yufan Dang Jiahao Li Cheng Yang Weize Chen Yusheng Su Xin Cong Juyuan Xu Dahai Li Zhiyuan Liu and Maosong Sun. 2024. ChatDev: Communicative Agents for Software Development. 10.48550\/arXiv.2307.07924 arxiv:https:\/\/arXiv.org\/abs\/2307.07924\u00a0[cs]","DOI":"10.48550\/arXiv.2307.07924"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-0378-8_16"},{"key":"e_1_3_3_2_29_2","unstructured":"Samuel Schmidgall Rojin Ziaei Carl Harris Eduardo Reis Jeffrey Jopling and Michael Moor. 2024. AgentClinic: A Multimodal Agent Benchmark to Evaluate AI in Simulated Clinical Environments. arxiv:https:\/\/arXiv.org\/abs\/2405.07960\u00a0[cs] http:\/\/arxiv.org\/abs\/2405.07960"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"publisher","unstructured":"Albrecht Schmidt Passant Elagroudy Fiona Draxler Frauke Kreuter and Robin Welsch. 2024. Simulating the Human in HCD with ChatGPT: Redesigning Interaction Design with AI. interactions 31 1 (Jan. 2024) 24\u201331. 10.1145\/3637436","DOI":"10.1145\/3637436"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","unstructured":"Yijia Shao Tianshi Li Weiyan Shi Yanchen Liu and Diyi Yang. 2024. PrivacyLens: Evaluating Privacy Norm Awareness of Language Models in Action. 10.48550\/arXiv.2409.00138 arxiv:https:\/\/arXiv.org\/abs\/2409.00138\u00a0[cs]","DOI":"10.48550\/arXiv.2409.00138"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","unstructured":"Debora Shaw. 1996. Handbook of Usability Testing: How to Plan Design and Conduct Effective Tests. Journal of the American Society for Information Science 47 3 (March 1996) 258\u2013259. 10.1002\/(SICI)1097-4571(199603)47:3$<$258::AID-ASI18$>$3.0.CO;2-#","DOI":"10.1002\/(SICI)1097-4571(199603)47:3$<$258::AID-ASI18$>$3.0.CO;2-#"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290607.3313039"},{"key":"e_1_3_3_2_34_2","volume-title":"The Thirteenth International Conference on Learning Representations","author":"Su Hongjin","year":"2024","unstructured":"Hongjin Su, Ruoxi Sun, Jinsung Yoon, Pengcheng Yin, Tao Yu, and Sercan\u00a0O. Arik. 2024. Learn-by-Interact: A Data-Centric Framework For Self-Adaptive Agents in Realistic Environments. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=3UKOzGWCVY"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642777"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","unstructured":"David\u00a0R. Thomas. 2006. A General Inductive Approach for Analyzing Qualitative Evaluation Data. American Journal of Evaluation 27 2 (2006) 237\u2013246. 10.1177\/1098214005283748 arXiv:10.1177\/1098214005283748","DOI":"10.1177\/1098214005283748"},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","unstructured":"Dakuo Wang Justin\u00a0D. Weisz Michael Muller Parikshit Ram Werner Geyer Casey Dugan Yla Tausczik Horst Samulowitz and Alexander Gray. 2019. Human-AI Collaboration in Data Science: Exploring Data Scientists\u2019 Perceptions of Automated AI. Proceedings of the ACM on Human-Computer Interaction 3 CSCW (Nov. 2019) 1\u201324. 10.1145\/3359313 arxiv:https:\/\/arXiv.org\/abs\/1909.02309\u00a0[cs]","DOI":"10.1145\/3359313"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","unstructured":"Lei Wang Chen Ma Xueyang Feng Zeyu Zhang Hao Yang Jingsen Zhang Zhiyuan Chen Jiakai Tang Xu Chen Yankai Lin Wayne\u00a0Xin Zhao Zhewei Wei and Ji-Rong Wen. 2024. A Survey on Large Language Model Based Autonomous Agents. Frontiers of Computer Science 18 6 (Dec. 2024) 186345. 10.1007\/s11704-024-40231-1 arxiv:https:\/\/arXiv.org\/abs\/2308.11432\u00a0[cs]","DOI":"10.1007\/s11704-024-40231-1"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"Ke Yang Yao Liu Sapana Chaudhary Rasool Fakoor Pratik Chaudhari George Karypis and Huzefa Rangwala. 2024. AgentOccam: A Simple Yet Strong Baseline for LLM-Based Web Agents. 10.48550\/arXiv.2410.13825 arxiv:https:\/\/arXiv.org\/abs\/2410.13825\u00a0[cs]","DOI":"10.48550\/arXiv.2410.13825"},{"key":"e_1_3_3_2_40_2","volume-title":"Advances in Neural Information Processing Systems","author":"Yao Shunyu","year":"2022","unstructured":"Shunyu Yao, Howard Chen, John Yang, and Karthik\u00a0R. Narasimhan. 2022. WebShop: Towards Scalable Real-World Web Interaction with Grounded Language Agents. In Advances in Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=R9KnuFlvnU"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","unstructured":"Zhiping Zhang Bingcan Guo and Tianshi Li. 2025. Privacy Leakage Overshadowed by Views of AI: A Study on Human Oversight of Privacy in Language Model Agent. 10.48550\/arXiv.2411.01344 arxiv:https:\/\/arXiv.org\/abs\/2411.01344\u00a0[cs]","DOI":"10.48550\/arXiv.2411.01344"},{"key":"e_1_3_3_2_42_2","unstructured":"Shuyan Zhou Frank\u00a0F. Xu Hao Zhu Xuhui Zhou Robert Lo Abishek Sridhar Xianyi Cheng Tianyue Ou Yonatan Bisk Daniel Fried Uri Alon and Graham Neubig. 2024. WebArena: A Realistic Web Environment for Building Autonomous Agents. arxiv:https:\/\/arXiv.org\/abs\/2307.13854\u00a0[cs] http:\/\/arxiv.org\/abs\/2307.13854"}],"event":{"name":"CHI EA '25: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI EA '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3719729","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706599.3719729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:43Z","timestamp":1750295923000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706599.3719729"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":41,"alternative-id":["10.1145\/3706599.3719729","10.1145\/3706599"],"URL":"https:\/\/doi.org\/10.1145\/3706599.3719729","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}