{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,13]],"date-time":"2026-06-13T07:04:30Z","timestamp":1781334270021,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","funder":[{"name":"This work was supported in part by the National Science Foundation awards","award":["# 1925043 and 2330040"],"award-info":[{"award-number":["# 1925043 and 2330040"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,16]]},"DOI":"10.1145\/3757279.3785550","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T00:27:38Z","timestamp":1773102458000},"page":"914-923","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["RoboCritics: Enabling Reliable End-to-End LLM Robot Programming through Expert-Informed Critics"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4195-8317","authenticated-orcid":false,"given":"Callie Y.","family":"Kim","sequence":"first","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-9414-9647","authenticated-orcid":false,"given":"Nathan Thomas","family":"White","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6663-8755","authenticated-orcid":false,"given":"Evan","family":"He","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0379-2827","authenticated-orcid":false,"given":"Frederic","family":"Sala","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9456-1495","authenticated-orcid":false,"given":"Bilge","family":"Mutlu","sequence":"additional","affiliation":[{"name":"University of Wisconsin-Madison, Madison, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,3,16]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Michael Ahn Anthony Brohan Noah Brown Yevgen Chebotar Omar Cortes Byron David Chelsea Finn Chuyuan Fu Keerthana Gopalakrishnan Karol Hausman Alex Herzog Daniel Ho Jasmine Hsu Julian Ibarz Brian Ichter Alex Irpan Eric Jang Rosario Jauregui Ruano Kyle Jeffrey Sally Jesmonth Nikhil J Joshi Ryan Julian Dmitry Kalashnikov Yuheng Kuang Kuang-Huei Lee Sergey Levine Yao Lu Linda Luu Carolina Parada Peter Pastor Jornell Quiambao Kanishka Rao Jarek Rettinghouse Diego Reyes Pierre Sermanet Nicolas Sievers Clayton Tan Alexander Toshev Vincent Vanhoucke Fei Xia Ted Xiao Peng Xu Sichun Xu Mengyuan Yan and Andy Zeng. 2022. Do As I Can Not As I Say: Grounding Language in Robotic Affordances. arxiv:2204.01691. arxiv:2204.01691"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3371382.3378300"},{"key":"e_1_3_2_2_3_1","unstructured":"Ben Athiwaratkun Sanjay Krishna Gouda Zijian Wang Xiaopeng Li Yuchen Tian Ming Tan Wasi Uddin Ahmad Shiqi Wang Qing Sun Mingyue Shang Sujan Kumar Gonugondla Hantian Ding Varun Kumar Nathan Fulton Arash Farahani Siddhartha Jain Robert Giaquinto Haifeng Qian Murali Krishna Ramanathan Ramesh Nallapati Baishakhi Ray Parminder Bhatia Sudipta Sengupta Dan Roth and Bing Xiang. 2023. Multi-lingual Evaluation of Code Generation Models. arxiv:2210.14868. arxiv:2210.14868"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586030"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589996"},{"key":"e_1_3_2_2_6_1","unstructured":"John Brooke et al. 1996. SUS-A quick and dirty usability scale. Usability evaluation in industry 189 194 (1996) 4\u20137."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1108\/01439910510629244"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaaiss.v2i1.27721"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1561\/2300000066"},{"key":"e_1_3_2_2_10_1","volume-title":"Encyclopedia of critical psychology","author":"Clarke Victoria","year":"1947","unstructured":"Victoria Clarke and Virginia Braun. 2014. Thematic analysis. In Encyclopedia of critical psychology. Springer, 1947\u20131952."},{"key":"e_1_3_2_2_11_1","volume-title":"Michael Aaron Peshkin, and James Edward Colgate","author":"Colgate Ed","year":"2008","unstructured":"Ed Colgate, Antonio Bicchi, Michael Aaron Peshkin, and James Edward Colgate. 2008. Safety for physical human-robot interaction. In Springer handbook of robotics. Springer, 1335\u20131348."},{"key":"e_1_3_2_2_12_1","unstructured":"Danny Driess Fei Xia Mehdi S. M. Sajjadi Corey Lynch Aakanksha Chowdhery Brian Ichter Ayzaan Wahid Jonathan Tompson Quan Vuong Tianhe Yu Wenlong Huang Yevgen Chebotar Pierre Sermanet Daniel Duckworth Sergey Levine Vincent Vanhoucke Karol Hausman Marc Toussaint Klaus Greff Andy Zeng Igor Mordatch and Pete Florence. 2023. PaLM-E: An Embodied Multimodal Language Model. arxiv:2303.03378."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610978.3640653"},{"key":"e_1_3_2_2_14_1","volume-title":"CRITIC: Large Language Models Can Self-Correct with Tool-Interactive Critiquing. arxiv:2305.11738. arxiv:2305.11738","author":"Gou Zhibin","year":"2024","unstructured":"Zhibin Gou, Zhihong Shao, Yeyun Gong, Yelong Shen, Yujiu Yang, Nan Duan, and Weizhu Chen. 2024. CRITIC: Large Language Models Can Self-Correct with Tool-Interactive Critiquing. arxiv:2305.11738. arxiv:2305.11738"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2017.04.004"},{"key":"e_1_3_2_2_16_1","volume-title":"Advances in psychology. 52","author":"Hart Sandra G","unstructured":"Sandra G Hart and Lowell E Staveland. 1988. Development of NASA-TLX (Task Load Index): Results of empirical and theoretical research. In Advances in psychology. 52, Elsevier, 139\u2013183."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2909824.3020215"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI61500.2025.10974232"},{"key":"e_1_3_2_2_19_1","unstructured":"Abhinav Jain Chris Jermaine and Vaibhav Unhelkar. 2024. RAG-Modulo: Solving Sequential Tasks using Experience Critics and Language Models. arxiv:2409.12294. arxiv:2409.12294"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_2_21_1","unstructured":"Juyong Jiang Fan Wang Jiasi Shen Sungju Kim and Sunghun Kim. 2024. A Survey on Large Language Models for Code Generation. arXiv preprint arXiv:2406.00515."},{"key":"e_1_3_2_2_22_1","unstructured":"Subbarao Kambhampati Karthik Valmeekam Lin Guan Mudit Verma Kaya Stechly Siddhant Bhambri Lucas Saldyt and Anil Murthy. 2024. LLMs Can\u2019t Plan But Can Help Planning in LLM-Modulo Frameworks. arxiv:2402.01817. arxiv:2402.01817"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634969"},{"key":"e_1_3_2_2_25_1","volume-title":"Sergio Aguilera, Rui Zhang, Jie Ding, Seth Hutchinson, and Ali Anwar.","author":"Khan Azal Ahmad","year":"2025","unstructured":"Azal Ahmad Khan, Michael Andrev, Muhammad Ali Murtaza, Sergio Aguilera, Rui Zhang, Jie Ding, Seth Hutchinson, and Ali Anwar. 2025. Safety Aware Task Planning via Large Language Models in Robotics. arxiv:2503.15707. arxiv:2503.15707"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634966"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11370-024-00550-5"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","unstructured":"Krishna Kodur Manizheh Zand Matthew Tognotti Cinthya Jauregui and Maria Kyrarini. 2023. Structured and Unstructured Speech2Action Frameworks for Human-Robot Collaboration: A User Study. Aug. https:\/\/doi.org\/10.36227\/techrxiv.24022452.v1 10.36227\/techrxiv.24022452.v1","DOI":"10.36227\/techrxiv.24022452.v1"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714113"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/RO-MAN46459.2019.8956327"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3415931"},{"key":"e_1_3_2_2_32_1","volume-title":"Measuring usability with the use questionnaire12. Usability interface, 8, 2","author":"Lund Arnold M","year":"2001","unstructured":"Arnold M Lund. 2001. Measuring usability with the use questionnaire12. Usability interface, 8, 2 (2001), 3\u20136."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634999"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI61500.2025.10974179"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3359174"},{"key":"e_1_3_2_2_36_1","unstructured":"Christopher E. Mower Yuhui Wan Hongzhan Yu Antoine Grosnit Jonas Gonzalez-Billandon Matthieu Zimmer Jinlong Wang Xinyu Zhang Yao Zhao Anbang Zhai Puze Liu Daniel Palenicek Davide Tateo Cesar Cadena Marco Hutter Jan Peters Guangjian Tian Yuzheng Zhuang Kun Shao Xingyue Quan Jianye Hao Jun Wang and Haitham Bou-Ammar. 2024. ROS-LLM: A ROS framework for embodied AI with task feedback and structured reasoning. arxiv:2406.19741. arxiv:2406.19741"},{"key":"e_1_3_2_2_37_1","unstructured":"Ike Obi Vishnunandan L. N. Venkatesh Weizheng Wang Ruiqi Wang Dayoon Suh Temitope I. Amosa Wonse Jo and Byung-Cheol Min. 2025. SafePlan: Leveraging Formal Logic and Chain-of-Thought Reasoning for Enhanced Safety in LLM-based Robotic Task Planning. arxiv:2503.06892. arxiv:2503.06892"},{"key":"e_1_3_2_2_38_1","unstructured":"OpenAI. 2024. Hello GPT-4 Turbo. https:\/\/openai.com\/index\/hello-gpt-4o\/ Accessed: 2024-12-18"},{"key":"e_1_3_2_2_39_1","unstructured":"OpenAI. 2024. New and Improved Embedding Model. https:\/\/openai.com\/index\/new-and-improved-embedding-model\/ Accessed: 2024-12-18"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2016.7745110"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676401"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/2701973.2702007"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347957"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3634974"},{"key":"e_1_3_2_2_45_1","unstructured":"Behrad Rabiei Mahesh Kumar A. R. Zhirui Dai Surya L. S. R. Pilla Qiyue Dong and Nikolay Atanasov. 2025. LTLCodeGen: Code Generation of Syntactically Correct Temporal Logic for Robot Task Planning. arxiv:2503.07902. arxiv:2503.07902"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610977.3637477"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3568162.3576982"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/HRI53351.2022.9889345"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919884623"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1177\/2158244020958736"},{"key":"e_1_3_2_2_51_1","unstructured":"Ishika Singh Valts Blukis Arsalan Mousavian Ankit Goyal Danfei Xu Jonathan Tremblay Dieter Fox Jesse Thomason and Animesh Garg. 2022. ProgPrompt: Generating Situated Robot Task Plans using Large Language Models. arxiv:2209.11302. arxiv:2209.11302"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00280"},{"key":"e_1_3_2_2_53_1","unstructured":"Kaya Stechly Matthew Marquez and Subbarao Kambhampati. 2023. GPT-4 Doesn\u2019t Know It\u2019s Wrong: An Analysis of Iterative Prompting for Reasoning Problems. arxiv:2310.12397. arxiv:2310.12397"},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610978.3640644"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3708359.3712091"},{"key":"e_1_3_2_2_56_1","volume-title":"Advances in Neural Information Processing Systems","author":"Valmeekam Karthik","year":"2023","unstructured":"Karthik Valmeekam, Matthew Marquez, Sarath Sreedharan, and Subbarao Kambhampati. 2023. On the Planning Abilities of Large Language Models - A Critical Investigation. In Advances in Neural Information Processing Systems, A. Oh, T. Naumann, A. Globerson, K. Saenko, M. Hardt, and S. Levine (Eds.). 36, Curran Associates, Inc., 75993\u201376005. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/efb2072a358cefb75886a315a6fcf880-Paper-Conference.pdf"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2013.6630576"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3387941"},{"key":"e_1_3_2_2_59_1","unstructured":"Koki Wataoka Tsubasa Takahashi and Ryokan Ri. 2025. Self-Preference Bias in LLM-as-a-Judge. arxiv:2410.21819. arxiv:2410.21819"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ETFA46521.2020.9212036"},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.826"},{"key":"e_1_3_2_2_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10611447"},{"key":"e_1_3_2_2_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3282111"},{"key":"e_1_3_2_2_64_1","unstructured":"Burak Yeti\u015ftiren I\u015f\u0131k \u00d6zsoy Miray Ayerdem and Eray T\u00fcz\u00fcn. 2023. Evaluating the Code Quality of AI-Assisted Code Generation Tools: An Empirical Study on GitHub Copilot Amazon CodeWhisperer and ChatGPT. arxiv:2304.10778. arxiv:2304.10778"},{"key":"e_1_3_2_2_65_1","volume-title":"Hao-Tien Lewis Chiang, Tom Erez, Leonard Hasenclever, Jan Humplik, Brian Ichter, Ted Xiao, Peng Xu, Andy Zeng, Tingnan Zhang, Nicolas Heess, Dorsa Sadigh, Jie Tan, Yuval Tassa, and Fei Xia.","author":"Yu Wenhao","year":"2023","unstructured":"Wenhao Yu, Nimrod Gileadi, Chuyuan Fu, Sean Kirmani, Kuang-Huei Lee, Montse Gonzalez Arenas, Hao-Tien Lewis Chiang, Tom Erez, Leonard Hasenclever, Jan Humplik, Brian Ichter, Ted Xiao, Peng Xu, Andy Zeng, Tingnan Zhang, Nicolas Heess, Dorsa Sadigh, Jie Tan, Yuval Tassa, and Fei Xia. 2023. Language to Rewards for Robotic Skill Synthesis. arxiv:2306.08647. arxiv:2306.08647"},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","unstructured":"Alex Wuqi Zhang Rafael Queiroz and Sarah Sebo. 2025. Balancing User Control and Perceived Robot Social Agency Through the Design of End-User Robot Programming Interfaces. In 2025 20th ACM\/IEEE International Conference on Human-Robot Interaction (HRI). 899\u2013908. https:\/\/doi.org\/10.1109\/HRI61500.2025.10974063 10.1109\/HRI61500.2025.10974063","DOI":"10.1109\/HRI61500.2025.10974063"},{"key":"e_1_3_2_2_67_1","doi-asserted-by":"publisher","DOI":"10.1109\/TII.2016.2625818"}],"event":{"name":"HRI '26: 21st ACM\/IEEE International Conference on Human-Robot Interaction","location":"Edinburgh Scotland UK","acronym":"HRI '26","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction","IEEE RAS"]},"container-title":["Proceedings of the 21st ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"deposited":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T00:34:50Z","timestamp":1773534890000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3757279.3785550"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,16]]},"references-count":67,"alternative-id":["10.1145\/3757279.3785550","10.1145\/3757279"],"URL":"https:\/\/doi.org\/10.1145\/3757279.3785550","relation":{},"subject":[],"published":{"date-parts":[[2026,3,16]]},"assertion":[{"value":"2026-03-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}