{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T10:56:16Z","timestamp":1776423376798,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":116,"publisher":"ACM","funder":[{"DOI":"10.13039\/100006754","name":"Army Research Laboratory","doi-asserted-by":"publisher","award":["W911NF2120101"],"award-info":[{"award-number":["W911NF2120101"]}],"id":[{"id":"10.13039\/100006754","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Army Research Laboratory","award":["W911NF2320203"],"award-info":[{"award-number":["W911NF2320203"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3772318.3790655","type":"proceedings-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T04:12:36Z","timestamp":1776053556000},"page":"1-20","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["When Should Users Check? Modeling Confirmation Frequency in Multi-Step Agentic AI Tasks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-4816-3938","authenticated-orcid":false,"given":"Jieyu","family":"Zhou","sequence":"first","affiliation":[{"name":"School of interactive computing, Georgia Institute of Technology, Atlanta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5220-8725","authenticated-orcid":false,"given":"Aryan","family":"Roy","sequence":"additional","affiliation":[{"name":"College of Computing, Georgia Institute of Technology, Atlanta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1233-3772","authenticated-orcid":false,"given":"Sneh","family":"Gupta","sequence":"additional","affiliation":[{"name":"College of Computing, Georgia Institute of Technology, Atlanta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0079-8000","authenticated-orcid":false,"given":"Daniel","family":"Weitekamp","sequence":"additional","affiliation":[{"name":"School of Interactive Computing, Georgia Institute of Technology, Atlanta, Georgia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3084-5189","authenticated-orcid":false,"given":"Christopher J.","family":"MacLellan","sequence":"additional","affiliation":[{"name":"School of Interactive Computing, Georgia Institute of Technology, Atlanta, Georgia, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"Constructions Aeronautiques Adele Howe Craig Knoblock ISI\u00a0Drew McDermott Ashwin Ram Manuela Veloso Daniel Weld David\u00a0Wilkins Sri Anthony Barrett Dave Christianson et\u00a0al. 1998. Pddl\u2014the planning domain definition language. Technical Report Tech. Rep. (1998)."},{"key":"e_1_3_3_3_3_2","unstructured":"Layla AI. 2024. About me Layla: AI Trip Planner and Travel Sidekick. https:\/\/layla.ai\/about. Accessed: July 2025."},{"key":"e_1_3_3_3_4_2","unstructured":"Pokee AI. 2024. Revolutionize Digital Productivity with AI-Powered Workflow Automation. https:\/\/pokee.ai\/home. Accessed: July 2025."},{"key":"e_1_3_3_3_5_2","doi-asserted-by":"crossref","unstructured":"Lize Alberts Ulrik Lyngs and Max Van\u00a0Kleek. 2024. Computers as bad social actors: Dark patterns and anti-patterns in interfaces that act socially. Proceedings of the ACM on Human-Computer Interaction 8 CSCW1 (2024) 1\u201325.","DOI":"10.1145\/3653693"},{"key":"e_1_3_3_3_6_2","unstructured":"Dario Amodei Chris Olah Jacob Steinhardt Paul Christiano John Schulman and Dan Man\u00e9. 2016. Concrete problems in AI safety. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1606.06565 (2016)."},{"key":"e_1_3_3_3_7_2","unstructured":"Anthropic. 2024. Introducing computer use a new Claude 3.5 Sonnet and Claude 3.5 Haiku. https:\/\/www.anthropic.com\/news\/3-5-models-and-computer-use. Accessed: July 2025."},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676350"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642016"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300484"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"crossref","unstructured":"Rui Assis and Pedro\u00a0Carmona Marques. 2021. A dynamic methodology for setting up inspection time intervals in conditional preventive maintenance. Applied Sciences 11 18 (2021) 8715.","DOI":"10.3390\/app11188715"},{"key":"e_1_3_3_3_12_2","unstructured":"Gagan Bansal Jennifer\u00a0Wortman Vaughan Saleema Amershi Eric Horvitz Adam Fourney Hussein Mozannar Victor Dibia and Daniel\u00a0S Weld. 2024. Challenges in human-agent communication. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.10380 (2024)."},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Richard Barlow and Larry Hunter. 1960. Optimum preventive maintenance policies. Operations research 8 1 (1960) 90\u2013100.","DOI":"10.1287\/opre.8.1.90"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"crossref","unstructured":"Hugh Beyer and Karen Holtzblatt. 1999. Contextual design. interactions 6 1 (1999) 32\u201342.","DOI":"10.1145\/291224.291229"},{"key":"e_1_3_3_3_15_2","unstructured":"Rogerio Bonatti Dan Zhao Francesco Bonacci Dillon Dupont Sara Abdali Yinheng Li Yadong Lu Justin Wagle Kazuhito Koishida Arthur Bucker et\u00a0al. 2024. Windows agent arena: Evaluating multi-modal os agents at scale. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.08264 (2024)."},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1037\/13620-004"},{"key":"e_1_3_3_3_17_2","unstructured":"Matthew Chang Gunjan Chhablani Alexander Clegg Mikael\u00a0Dallaire Cote Ruta Desai Michal Hlavac Vladimir Karashchuk Jacob Krantz Roozbeh Mottaghi Priyam Parashar et\u00a0al. 2024. PARTNR: A Benchmark for Planning and Reasoning in Embodied Multi-agent Tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.00081 (2024)."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606720"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376496"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3517477"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747746"},{"key":"e_1_3_3_3_22_2","unstructured":"CrewAI. 2024. What is CrewAI? https:\/\/docs.crewai.com\/en\/introduction. Accessed: July 2025."},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676323"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Bram De\u00a0Jonge and Philip\u00a0A Scarf. 2020. A review on maintenance optimization. European journal of operational research 285 3 (2020) 805\u2013824.","DOI":"10.1016\/j.ejor.2019.09.047"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"crossref","unstructured":"Rommert Dekker Ralph\u00a0E Wildeman and Frank\u00a0A Van\u00a0der Duyn\u00a0Schouten. 1997. A review of multi-component maintenance models with economic dependence. Mathematical methods of operations research 45 3 (1997) 411\u2013435.","DOI":"10.1007\/BF01194788"},{"key":"e_1_3_3_3_26_2","unstructured":"Zane Durante Qiuyuan Huang Naoki Wake Ran Gong Jae\u00a0Sung Park Bidipta Sarkar Rohan Taori Yusuke Noda Demetri Terzopoulos Yejin Choi et\u00a0al. 2024. Agent ai: Surveying the horizons of multimodal interaction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.03568 (2024)."},{"key":"e_1_3_3_3_27_2","unstructured":"Nouha Dziri Ximing Lu Melanie Sclar Xiang\u00a0Lorraine Li Liwei Jiang Bill\u00a0Yuchen Lin Peter West Chandra Bhagavatula Ronan Le\u00a0Bras Jena\u00a0D Hwang et\u00a0al. 2023. Faith and fate: Limits of transformers on compositionality (2023). arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.18654 (2023)."},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713581"},{"key":"e_1_3_3_3_29_2","unstructured":"KJ Feng Kevin Pu Matt Latzke Tal August Pao Siangliulue Jonathan Bragg Daniel\u00a0S Weld Amy\u00a0X Zhang and Joseph\u00a0Chee Chang. 2024. Cocoa: Co-planning and co-execution with ai agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.10999 (2024)."},{"key":"e_1_3_3_3_30_2","unstructured":"Flowith. 2024. Introduction to Flow Mode. https:\/\/doc.flowith.io\/flow-mode-general-mode\/introduction-to-flow-mode. Accessed: July 2025."},{"key":"e_1_3_3_3_31_2","unstructured":"James Fodor. 2025. Line goes up? inherent limitations of benchmarks for evaluating large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.14318 (2025)."},{"key":"e_1_3_3_3_32_2","unstructured":"Genspark. 2024. Understanding Genspark.AI. https:\/\/genspark.ai\/spark\/understanding-genspark-ai. Accessed: July 2025."},{"key":"e_1_3_3_3_33_2","volume-title":"Forty-second International Conference on Machine Learning","author":"Hahn Meera","year":"2025","unstructured":"Meera Hahn, Wenjun Zeng, Nithish Kannen, Rich Galt, Kartikeya Badola, Been Kim, and Zi Wang. 2025. Proactive Agents for Multi-Turn Text-to-Image Generation Under Uncertainty. In Forty-second International Conference on Machine Learning."},{"key":"e_1_3_3_3_34_2","doi-asserted-by":"crossref","unstructured":"Andreas Holzinger Kurt Zatloukal and Heimo M\u00fcller. 2025. Is human oversight to AI systems still possible? 59\u201362\u00a0pages.","DOI":"10.1016\/j.nbt.2024.12.003"},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/302979.303030"},{"key":"e_1_3_3_3_36_2","unstructured":"Siyuan Hu Mingyu Ouyang Difei Gao and Mike\u00a0Zheng Shou. 2024. The dawn of gui agent: A preliminary case study with claude 3.5 computer use. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.10323 (2024)."},{"key":"e_1_3_3_3_37_2","unstructured":"Xu Huang Weiwen Liu Xiaolong Chen Xingmei Wang Hao Wang Defu Lian Yasheng Wang Ruiming Tang and Enhong Chen. 2024. Understanding the planning of LLM agents: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.02716 (2024)."},{"key":"e_1_3_3_3_38_2","unstructured":"Faria Huq Jeffrey\u00a0P Bigham and Nikolas Martelaro. 2023. \" What\u2019s important here?\": Opportunities and Challenges of Using LLMs in Retrieving Information from Web Interfaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.06147 (2023)."},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.naacl-demo.17"},{"key":"e_1_3_3_3_40_2","doi-asserted-by":"crossref","unstructured":"Deniz Iren and Semih Bilgen. 2014. Cost of quality in crowdsourcing. Human Computation 1 2 (2014).","DOI":"10.15346\/hc.v1i2.14"},{"key":"e_1_3_3_3_41_2","doi-asserted-by":"crossref","unstructured":"Ziwei Ji Nayeon Lee Rita Frieske Tiezheng Yu Dan Su Yan Xu Etsuko Ishii Ye\u00a0Jin Bang Andrea Madotto and Pascale Fung. 2023. Survey of hallucination in natural language generation. Comput. Surveys 55 12 (2023) 1\u201338.","DOI":"10.1145\/3571730"},{"key":"e_1_3_3_3_42_2","unstructured":"Liqiang Jing Zhehui Huang Xiaoyang Wang Wenlin Yao Wenhao Yu Kaixin Ma Hongming Zhang Xinya Du and Dong Yu. 2024. DSBench: How Far Are Data Science Agents from Becoming Data Science Experts? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.07703 (2024)."},{"key":"e_1_3_3_3_43_2","unstructured":"Saurav Kadavath Tom Conerly Amanda Askell Tom Henighan Dawn Drain Ethan Perez Nicholas Schiefer Zac Hatfield-Dodds Nova DasSarma Eli Tran-Johnson et\u00a0al. 2022. Language models (mostly) know what they know. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.05221 (2022)."},{"key":"e_1_3_3_3_44_2","unstructured":"Sayash Kapoor Benedikt Stroebl Zachary\u00a0S Siegel Nitya Nadgir and Arvind Narayanan. 2024. Ai agents that matter. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.01502 (2024)."},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676345"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502079"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3641915"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"crossref","unstructured":"John\u00a0D Lee and Katrina\u00a0A See. 2004. Trust in automation: Designing for appropriate reliance. Human factors 46 1 (2004) 50\u201380.","DOI":"10.1518\/hfes.46.1.50.30392"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445522"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586182.3616671"},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714188"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025483"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415820"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747622"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/258549.258592"},{"key":"e_1_3_3_3_56_2","unstructured":"Jijia Liu Chao Yu Jiaxuan Gao Yuqing Xie Qingmin Liao Yi Wu and Yu Wang. 2023. Llm-powered hierarchical language agent for real-time human-ai coordination. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.15224 (2023)."},{"key":"e_1_3_3_3_57_2","unstructured":"Alexandra\u00a0Sasha Luccioni Sylvain Viguier and Anne-Laure Ligozat. 2023. Estimating the carbon footprint of bloom a 176b parameter language model. Journal of machine learning research 24 253 (2023) 1\u201315."},{"key":"e_1_3_3_3_58_2","unstructured":"Reuben Luera Ryan\u00a0A Rossi Alexa Siu Franck Dernoncourt Tong Yu Sungchul Kim Ruiyi Zhang Xiang Chen Hanieh Salehy Jian Zhao et\u00a0al. 2024. Survey of User Interface Design and Interaction Techniques in Generative AI Applications. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.22370 (2024) 1\u201342."},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"crossref","unstructured":"Zeyao Ma Bohan Zhang Jing Zhang Jifan Yu Xiaokang Zhang Xiaohan Zhang Sijia Luo Xi Wang and Jie Tang. 2024. Spreadsheetbench: Towards challenging real world spreadsheet manipulation. Advances in Neural Information Processing Systems 37 (2024) 94871\u201394908.","DOI":"10.52202\/079017-3007"},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","DOI":"10.5555\/212925.213004"},{"key":"e_1_3_3_3_61_2","unstructured":"Manus. 2024. Leave it to Manus: General AI Agent for Work and Life. https:\/\/manus.im\/home. Accessed: July 2025."},{"key":"e_1_3_3_3_62_2","unstructured":"Hussein Mozannar Gagan Bansal Cheng Tan Adam Fourney Victor Dibia Jingya Chen Jack Gerrits Tyler Payne Matheus\u00a0Kunzler Maldaner Madeleine Grunde-McLaughlin et\u00a0al. 2025. Magentic-ui: Towards human-in-the-loop agentic systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.22358 (2025)."},{"key":"e_1_3_3_3_63_2","volume-title":"Browser Use: Enable AI to control your browser","author":"M\u00fcller Magnus","year":"2024","unstructured":"Magnus M\u00fcller and Gregor \u017duni\u010d. 2024. Browser Use: Enable AI to control your browser. https:\/\/github.com\/browser-use\/browser-use"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"crossref","unstructured":"Dana\u00a0S Nau Tsz-Chiu Au Okhtay Ilghami Ugur Kuter J\u00a0William Murdock Dan Wu and Fusun Yaman. 2003. SHOP2: An HTN planning system. Journal of artificial intelligence research 20 (2003) 379\u2013404.","DOI":"10.1613\/jair.1141"},{"key":"e_1_3_3_3_65_2","unstructured":"OpenAI. 2025. Introducing ChatGPT agent: bridging research and action. https:\/\/openai.com\/index\/introducing-chatgpt-agent. Accessed: July 2025."},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"publisher","DOI":"10.1609\/icaps.v34i1.31503"},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"crossref","unstructured":"Raja Parasuraman and Dietrich\u00a0H Manzey. 2010. Complacency and bias in human use of automation: An attentional integration. Human factors 52 3 (2010) 381\u2013410.","DOI":"10.1177\/0018720810376055"},{"key":"e_1_3_3_3_68_2","doi-asserted-by":"crossref","unstructured":"David Patterson Joseph Gonzalez Urs H\u00f6lzle Quoc Le Chen Liang Lluis-Miquel Munguia Daniel Rothchild David\u00a0R So Maud Texier and Jeff Dean. 2022. The carbon footprint of machine learning training will plateau then shrink. Computer 55 7 (2022) 18\u201328.","DOI":"10.1109\/MC.2022.3148714"},{"key":"e_1_3_3_3_69_2","unstructured":"David Patterson Joseph Gonzalez Quoc Le Chen Liang Lluis-Miquel Munguia Daniel Rothchild David So Maud Texier and Jeff Dean. 2021. Carbon emissions and large neural network training. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.10350 (2021)."},{"key":"e_1_3_3_3_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747797"},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713357"},{"key":"e_1_3_3_3_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747770"},{"key":"e_1_3_3_3_73_2","unstructured":"Yujia Qin Yining Ye Junjie Fang Haoming Wang Shihao Liang Shizuo Tian Junda Zhang Jiahao Li Yunxin Li Shijue Huang et\u00a0al. 2025. Ui-tars: Pioneering automated gui interaction with native agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.12326 (2025)."},{"key":"e_1_3_3_3_74_2","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858074"},{"key":"e_1_3_3_3_75_2","doi-asserted-by":"crossref","unstructured":"Philip Quinn and Andy Cockburn. 2020. Loss aversion and preferences in interaction. Human\u2013Computer Interaction 35 2 (2020) 143\u2013190.","DOI":"10.1080\/07370024.2018.1433040"},{"key":"e_1_3_3_3_76_2","unstructured":"rabbit\u00a0research team. 2023. Learning human actions on computer applications. https:\/\/rabbit.tech\/research"},{"key":"e_1_3_3_3_77_2","doi-asserted-by":"crossref","unstructured":"Timo Schick Jane Dwivedi-Yu Roberto Dess\u00ec Roberta Raileanu Maria Lomeli Eric Hambro Luke Zettlemoyer Nicola Cancedda and Thomas Scialom. 2023. Toolformer: Language models can teach themselves to use tools. Advances in Neural Information Processing Systems 36 (2023) 68539\u201368551.","DOI":"10.52202\/075280-2997"},{"key":"e_1_3_3_3_78_2","unstructured":"Yijia Shao Vinay Samuel Yucheng Jiang John Yang and Diyi Yang. 2024. Collaborative gym: A framework for enabling and evaluating human-agent collaboration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.15701 (2024)."},{"key":"e_1_3_3_3_79_2","volume-title":"Goal Reasoning: Papers from the ACS Workshop","author":"Shivashankar Vikas","year":"2013","unstructured":"Vikas Shivashankar, Ron Alford, Ugur Kuter, and Dana\u00a0S Nau. 2013. Hierarchical goal networks and goal-driven autonomy: Going where ai planning meets goal reasoning. In Goal Reasoning: Papers from the ACS Workshop , Vol.\u00a095."},{"key":"e_1_3_3_3_80_2","doi-asserted-by":"crossref","unstructured":"Ben Shneiderman. 2020. Human-centered artificial intelligence: Reliable safe & trustworthy. International Journal of Human\u2013Computer Interaction 36 6 (2020) 495\u2013504.","DOI":"10.1080\/10447318.2020.1741118"},{"key":"e_1_3_3_3_81_2","doi-asserted-by":"crossref","unstructured":"Ben Shneiderman and Pattie Maes. 1997. Direct manipulation vs. interface agents. interactions 4 6 (1997) 42\u201361.","DOI":"10.1145\/267505.267514"},{"key":"e_1_3_3_3_82_2","doi-asserted-by":"crossref","unstructured":"Kaya Stechly Karthik Valmeekam and Subbarao Kambhampati. 2024. Chain of thoughtlessness? an analysis of cot in planning. Advances in Neural Information Processing Systems 37 (2024) 29106\u201329141.","DOI":"10.52202\/079017-0917"},{"key":"e_1_3_3_3_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642400"},{"key":"e_1_3_3_3_84_2","unstructured":"Theodore Sumers Shunyu Yao Karthik Narasimhan and Thomas Griffiths. 2023. Cognitive architectures for language agents. Transactions on Machine Learning Research (2023)."},{"key":"e_1_3_3_3_85_2","doi-asserted-by":"crossref","unstructured":"Sharareh Taghipour and Dragan Banjevic. 2012. Optimal inspection of a complex system subject to periodic and opportunistic inspections and preventive replacements. European Journal of Operational Research 220 3 (2012) 649\u2013660.","DOI":"10.1016\/j.ejor.2012.02.002"},{"key":"e_1_3_3_3_86_2","doi-asserted-by":"crossref","unstructured":"Mike Ten\u00a0Wolde and Adel\u00a0A Ghobbar. 2013. Optimizing inspection intervals\u2014Reliability and availability in terms of a cost model: A case study on railway carriers. Reliability Engineering & System Safety 114 (2013) 137\u2013147.","DOI":"10.1016\/j.ress.2012.12.013"},{"key":"e_1_3_3_3_87_2","doi-asserted-by":"crossref","unstructured":"David Tennenhouse. 2000. Proactive computing. Commun. ACM 43 5 (2000) 43\u201350.","DOI":"10.1145\/332833.332837"},{"key":"e_1_3_3_3_88_2","doi-asserted-by":"crossref","unstructured":"Sebastian Thrun. 2002. Probabilistic robotics. Commun. ACM 45 3 (2002) 52\u201357.","DOI":"10.1145\/504729.504754"},{"key":"e_1_3_3_3_89_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445497"},{"key":"e_1_3_3_3_90_2","unstructured":"Browser Use. 2024. The AI browser agent. https:\/\/browser-use.com. Accessed: July 2025."},{"key":"e_1_3_3_3_91_2","doi-asserted-by":"crossref","unstructured":"Karthik Valmeekam Matthew Marquez Alberto Olmo Sarath Sreedharan and Subbarao Kambhampati. 2023. Planbench: An extensible benchmark for evaluating large language models on planning and reasoning about change. Advances in Neural Information Processing Systems 36 (2023) 38975\u201338987.","DOI":"10.52202\/075280-1693"},{"key":"e_1_3_3_3_92_2","unstructured":"Karthik Valmeekam Matthew Marquez Sarath Sreedharan and Subbarao Kambhampati. 2024. On the planning abilities of large language models-a critical investigation. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_3_93_2","unstructured":"Guanzhi Wang Yuqi Xie Yunfan Jiang Ajay Mandlekar Chaowei Xiao Yuke Zhu Linxi Fan and Anima Anandkumar. 2023. Voyager: An Open-Ended Embodied Agent with Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2305.16291\u00a0[cs.AI]"},{"key":"e_1_3_3_3_94_2","doi-asserted-by":"crossref","unstructured":"Hongzhou Wang. 2002. A survey of maintenance policies of deteriorating systems. European journal of operational research 139 3 (2002) 469\u2013489.","DOI":"10.1016\/S0377-2217(01)00197-7"},{"key":"e_1_3_3_3_95_2","doi-asserted-by":"crossref","unstructured":"Lei Wang Chen Ma Xueyang Feng Zeyu Zhang Hao Yang Jingsen Zhang Zhiyuan Chen Jiakai Tang Xu Chen Yankai Lin et\u00a0al. 2024. A survey on large language model based autonomous agents. Frontiers of Computer Science 18 6 (2024) 186345.","DOI":"10.1007\/s11704-024-40231-1"},{"key":"e_1_3_3_3_96_2","doi-asserted-by":"crossref","unstructured":"Wenbin Wang. 2012. An overview of the recent advances in delay-time-based maintenance modelling. Reliability Engineering & System Safety 106 (2012) 165\u2013178.","DOI":"10.1016\/j.ress.2012.04.004"},{"key":"e_1_3_3_3_97_2","doi-asserted-by":"crossref","unstructured":"Xihuai Wang Shao Zhang Wenhao Zhang Wentao Dong Jingxiao Chen Ying Wen and Weinan Zhang. 2024. Zsc-eval: An evaluation toolkit and benchmark for multi-agent zero-shot coordination. Advances in Neural Information Processing Systems 37 (2024) 47344\u201347377.","DOI":"10.52202\/079017-1501"},{"key":"e_1_3_3_3_98_2","unstructured":"Zihao Wang Shaofei Cai Guanzhou Chen Anji Liu Xiaojian Ma and Yitao Liang. 2023. Describe Explain Plan and Select: Interactive Planning with Large Language Models Enables Open-World Multi-Task Agents. arxiv:https:\/\/arXiv.org\/abs\/2302.01560\u00a0[cs.AI]"},{"key":"e_1_3_3_3_99_2","unstructured":"Nicholas\u00a0R Waytowich Devin White MD Sunbeam and Vinicius\u00a0G Goecks. 2024. Atari-gpt: Investigating the capabilities of multimodal large language models as low-level policies for atari games. arXiv e-prints (2024) arXiv\u20132408."},{"key":"e_1_3_3_3_100_2","unstructured":"Jason Wei Zhiqing Sun Spencer Papay Scott McKinney Jeffrey Han Isa Fulford Hyung\u00a0Won Chung Alex\u00a0Tachard Passos William Fedus and Amelia Glaese. 2025. Browsecomp: A simple yet challenging benchmark for browsing agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.12516 (2025)."},{"key":"e_1_3_3_3_101_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_3_102_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642466"},{"key":"e_1_3_3_3_103_2","unstructured":"Jian Xie Kai Zhang Jiangjie Chen Tinghui Zhu Renze Lou Yuandong Tian Yanghua Xiao and Yu Su. 2024. Travelplanner: A benchmark for real-world planning with language agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.01622 (2024)."},{"key":"e_1_3_3_3_104_2","doi-asserted-by":"crossref","unstructured":"Tianbao Xie Danyang Zhang Jixuan Chen Xiaochuan Li Siheng Zhao Ruisheng Cao Toh\u00a0J Hua Zhoujun Cheng Dongchan Shin Fangyu Lei et\u00a0al. 2024. Osworld: Benchmarking multimodal agents for open-ended tasks in real computer environments. Advances in Neural Information Processing Systems 37 (2024) 52040\u201352094.","DOI":"10.52202\/079017-1650"},{"key":"e_1_3_3_3_105_2","unstructured":"Frank\u00a0F. Xu Yufan Song Boxuan Li Yuxuan Tang Kritanjali Jain Mengxue Bao Zora\u00a0Z. Wang Xuhui Zhou Zhitong Guo Murong Cao Mingyang Yang Hao\u00a0Yang Lu Amaad Martin Zhe Su Leander Maben Raj Mehta Wayne Chi Lawrence Jang Yiqing Xie Shuyan Zhou and Graham Neubig. 2024. TheAgentCompany: Benchmarking LLM Agents on Consequential Real World Tasks. arxiv:https:\/\/arXiv.org\/abs\/2412.14161\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2412.14161"},{"key":"e_1_3_3_3_106_2","doi-asserted-by":"crossref","unstructured":"Shunyu Yao Howard Chen John Yang and Karthik Narasimhan. 2022. Webshop: Towards scalable real-world web interaction with grounded language agents. Advances in Neural Information Processing Systems 35 (2022) 20744\u201320757.","DOI":"10.52202\/068431-1508"},{"key":"e_1_3_3_3_107_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Yao Shunyu","year":"2023","unstructured":"Shunyu Yao, Jeffrey Zhao, Dian Yu, Nan Du, Izhak Shafran, Karthik Narasimhan, and Yuan Cao. 2023. React: Synergizing reasoning and acting in language models. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_3_3_108_2","unstructured":"Enhao Zhang Erkang Zhu Gagan Bansal Adam Fourney Hussein Mozannar and Jack Gerrits. 2025. Optimizing sequential multi-step tasks with parallel llm agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.08944 (2025)."},{"key":"e_1_3_3_3_109_2","unstructured":"Shao Zhang Xihuai Wang Wenhao Zhang Chaoran Li Junru Song Tingyu Li Lin Qiu Xuezhi Cao Xunliang Cai Wen Yao et\u00a0al. 2025. Leveraging dual process theory in language agent framework for real-time simultaneous human-AI collaboration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.11882 (2025)."},{"key":"e_1_3_3_3_110_2","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545708"},{"key":"e_1_3_3_3_111_2","unstructured":"Penghao Zhao Hailin Zhang Qinhan Yu Zhengren Wang Yunteng Geng Fangcheng Fu Ling Yang Wentao Zhang Jie Jiang and Bin Cui. 2024. Retrieval-augmented generation for ai-generated content: A survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.19473 (2024)."},{"key":"e_1_3_3_3_112_2","doi-asserted-by":"crossref","unstructured":"Yuheng Zhao Junjie Wang Linbin Xiang Xiaowen Zhang Zifei Guo Cagatay Turkay Yu Zhang and Siming Chen. 2024. Lightva: Lightweight visual analytics with llm agent-based task planning and execution. IEEE Transactions on Visualization and Computer Graphics (2024).","DOI":"10.1109\/TVCG.2024.3496112"},{"key":"e_1_3_3_3_113_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501855"},{"key":"e_1_3_3_3_114_2","unstructured":"Jieyu Zhou. 2025. OrchVis: Hierarchical Multi-Agent Orchestration for Human Oversight. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.24937 (2025)."},{"key":"e_1_3_3_3_115_2","doi-asserted-by":"publisher","DOI":"10.1145\/3672539.3686326"},{"key":"e_1_3_3_3_116_2","unstructured":"Shuyan Zhou Frank\u00a0F Xu Hao Zhu Xuhui Zhou Robert Lo Abishek Sridhar Xianyi Cheng Tianyue Ou Yonatan Bisk Daniel Fried et\u00a0al. 2023. Webarena: A realistic web environment for building autonomous agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2307.13854 (2023)."},{"key":"e_1_3_3_3_117_2","doi-asserted-by":"crossref","unstructured":"Xiaojun Zhou Kaimin Huang Lifeng Xi and Jay Lee. 2015. Preventive maintenance modeling for multi-component systems with considering stochastic failures and disassembly sequence. Reliability Engineering & System Safety 142 (2015) 231\u2013237.","DOI":"10.1016\/j.ress.2015.05.005"}],"event":{"name":"CHI 2026: CHI Conference on Human Factors in Computing Systems","location":"Barcelona Spain","acronym":"CHI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772318.3790655","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T09:59:39Z","timestamp":1776419979000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772318.3790655"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":116,"alternative-id":["10.1145\/3772318.3790655","10.1145\/3772318"],"URL":"https:\/\/doi.org\/10.1145\/3772318.3790655","relation":{},"subject":[],"published":{"date-parts":[[2026,4,13]]},"assertion":[{"value":"2026-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}