{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:57:36Z","timestamp":1776931056022,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":62,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3742413.3789134","type":"proceedings-article","created":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T11:32:24Z","timestamp":1772537544000},"page":"456-473","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Improving Human Verification of LLM Reasoning through Interactive Explanation Interfaces"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1745-2148","authenticated-orcid":false,"given":"Runtao","family":"Zhou","sequence":"first","affiliation":[{"name":"University of Virginia, Charlottesville, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3845-103X","authenticated-orcid":false,"given":"Giang","family":"Nguyen","sequence":"additional","affiliation":[{"name":"GuideLabs, GuideLabs, San Francisco, California, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7057-484X","authenticated-orcid":false,"given":"Nikita","family":"Kharya","sequence":"additional","affiliation":[{"name":"Independent Researcher, Charlottesville, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0528-9416","authenticated-orcid":false,"given":"Anh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Computer Science and Software Engineering, Auburn University, Auburn, Alabama, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6354-7260","authenticated-orcid":false,"given":"Chirag","family":"Agarwal","sequence":"additional","affiliation":[{"name":"School of Data Science, University of Virginia, Charlottesville, Virginia, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,3,22]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174156"},{"key":"e_1_3_3_2_3_2","unstructured":"Chirag Agarwal Sree\u00a0Harsha Tanneru and Himabindu Lakkaraju. 2024. Faithfulness vs. plausibility: On the (un) reliability of explanations from large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.04614 (2024)."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Gulsum Alicioglu and Bo Sun. 2022. A survey of visual analytics for explainable artificial intelligence methods. Computers & Graphics 102 (2022) 502\u2013520.","DOI":"10.1016\/j.cag.2021.09.002"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3708359.3712148"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3640543.3645197"},{"key":"e_1_3_3_2_7_2","unstructured":"Anthropic. [n. d.]. Anthropic. https:\/\/assets.anthropic.com\/m\/785e231869ea8b3b\/original\/claude-3-7-sonnet-system-card.pdf"},{"key":"e_1_3_3_2_8_2","unstructured":"Wenhu Chen Xueguang Ma Xinyi Wang and William\u00a0W Cohen. 2022. Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.12588 (2022)."},{"key":"e_1_3_3_2_9_2","unstructured":"Karl Cobbe Vineet Kosaraju Mohammad Bavarian Mark Chen Heewoo Jun Lukasz Kaiser Matthias Plappert Jerry Tworek Jacob Hilton Reiichiro Nakano et\u00a0al. 2021. Training verifiers to solve math word problems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.14168 (2021)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/VL\/HCC60511.2024.00038"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"Albert Fenteng. 2023. Online Learning: A Cognitive Tool for Learning an Alternative to Traditional Learning Style. Psychology 14 5 (2023) 676\u2013686.","DOI":"10.4236\/psych.2023.145036"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Raymond Fok and Daniel\u00a0S Weld. 2024. In search of verifiability: Explanations rarely enable complementary performance in AI-advised decision making. AI Magazine 45 3 (2024) 317\u2013332.","DOI":"10.1002\/aaai.12182"},{"key":"e_1_3_3_2_13_2","unstructured":"Ali Forootani. 2025. A survey on mathematical reasoning and optimization with large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.17726 (2025)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Mouadh Guesmi Mohamed\u00a0Amine Chatti Shoeb Joarder Qurat\u00a0Ul Ain Rawaa Alatrash Clara Siepmann and Tannaz Vahidi. 2024. Interactive explanation with varying level of details in an explainable scientific literature recommender system. International Journal of Human\u2013Computer Interaction 40 22 (2024) 7248\u20137269.","DOI":"10.1080\/10447318.2023.2262797"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706468.3706483"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606737"},{"key":"e_1_3_3_2_17_2","unstructured":"Ryo Kamoi Sarkar Snigdha\u00a0Sarathi Das Renze Lou Jihyun\u00a0Janice Ahn Yilun Zhao Xiaoxin Lu Nan Zhang Yusen Zhang Ranran\u00a0Haoran Zhang Sujeeth\u00a0Reddy Vummanthala et\u00a0al. 2024. Evaluating LLMs at detecting errors in LLM responses. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.03602 (2024)."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376219"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"crossref","unstructured":"Hassan Khosravi Simon\u00a0Buckingham Shum Guanliang Chen Cristina Conati Yi-Shan Tsai Judy Kay Simon Knight Roberto Martinez-Maldonado Shazia Sadiq and Dragan Ga\u0161evi\u0107. 2022. Explainable artificial intelligence in education. Computers and education: artificial intelligence 3 (2022) 100074.","DOI":"10.1016\/j.caeai.2022.100074"},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Byung\u00a0Hyung Kim Seunghun Koh Sejoon Huh Sungho Jo and Sunghee Choi. 2020. Improved explanatory efficacy on human affect and workload through interactive process in artificial intelligence. IEEE Access 8 (2020) 189013\u2013189024.","DOI":"10.1109\/ACCESS.2020.3032056"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"crossref","unstructured":"Jenia Kim Henry Maathuis and Danielle Sent. 2024. Human-centered evaluation of explainable AI applications: a systematic review. Frontiers in Artificial Intelligence 7 (2024) 1456486.","DOI":"10.3389\/frai.2024.1456486"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581001"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"crossref","unstructured":"Takeshi Kojima Shixiang\u00a0Shane Gu Machel Reid Yutaka Matsuo and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems 35 (2022) 22199\u201322213.","DOI":"10.52202\/068431-1613"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Rhonda\u00a0G Kost and Joel\u00a0Correa da Rosa. 2018. Impact of survey length and compensation on validity reliability and sample characteristics for ultrashort- short- and long-research participant perception surveys. Journal of clinical and translational science 2 1 (2018) 31\u201337.","DOI":"10.1017\/cts.2018.18"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Jon\u00a0A Krosnick. 1991. Response strategies for coping with the cognitive demands of attitude measures in surveys. Applied cognitive psychology 5 3 (1991) 213\u2013236.","DOI":"10.1002\/acp.2350050305"},{"key":"e_1_3_3_2_26_2","unstructured":"Jenny Kunz and Marco Kuhlmann. 2024. Properties and challenges of llm-generated explanations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.10532 (2024)."},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1609\/hcomp.v7i1.5280"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3306618.3314229"},{"key":"e_1_3_3_2_29_2","unstructured":"Khai Le-Duc Duy\u00a0MH Nguyen Phuong\u00a0TH Trinh Tien-Phat Nguyen Nghiem\u00a0T Diep An Ngo Tung Vu Trinh Vuong Anh-Tien Nguyen Mau Nguyen et\u00a0al. 2025. S-chain: Structured visual chain-of-thought for medicine. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2510.22728 (2025)."},{"key":"e_1_3_3_2_30_2","unstructured":"Xiaoyuan Li Wenjie Wang Moxin Li Junrong Guo Yang Zhang and Fuli Feng. 2024. Evaluating mathematical reasoning of large language models: A focus on error identification and correction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.00755 (2024)."},{"key":"e_1_3_3_2_31_2","unstructured":"Q\u00a0Vera Liao and Kush\u00a0R Varshney. 2021. Human-centered explainable ai (xai): From algorithms to user experiences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2110.10790 (2021)."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Rhema Linder Sina Mohseni Fan Yang Shiva\u00a0K Pentyala Eric\u00a0D Ragan and Xia\u00a0Ben Hu. 2021. How level of explanation detail affects human performance in interpretable intelligent systems: A study on explainable fact checking. Applied AI Letters 2 4 (2021) e49.","DOI":"10.1002\/ail2.49"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"crossref","unstructured":"Tim Miller. 2019. Explanation in artificial intelligence: Insights from the social sciences. Artificial intelligence 267 (2019) 1\u201338.","DOI":"10.1016\/j.artint.2018.07.007"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Venkatesh Mishra Bimsara Pathiraja Mihir Parmar Sat Chidananda Jayanth Srinivasa Gaowen Liu Ali Payani and Chitta Baral. 2025. Investigating the Shortcomings of LLMs in Step-by-Step Legal Reasoning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.05675 (2025).","DOI":"10.18653\/v1\/2025.findings-naacl.435"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Deepa Muralidhar Rafik Belloum and Ashwin Ashok. 2025. Operationalizing selective transparency using progressive disclosure in artificial intelligence clinical diagnosis systems. International Journal of Human-Computer Studies (2025) 103591.","DOI":"10.1016\/j.ijhcs.2025.103591"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-42280-5_21"},{"key":"e_1_3_3_2_37_2","unstructured":"Menaka Narayanan Emily Chen Jeffrey He Been Kim Sam Gershman and Finale Doshi-Velez. 2018. How do humans understand explanations from machine learning systems? an evaluation of the human-interpretability of explanation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.00682 (2018)."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Sania Nayab Giulio Rossolini Marco Simoni Andrea Saracino Giorgio Buttazzo Nicolamaria Manes and Fabrizio Giacomelli. 2024. Concise thoughts: Impact of output length on llm reasoning and cost. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.19825 (2024).","DOI":"10.2139\/ssrn.5293076"},{"key":"e_1_3_3_2_39_2","unstructured":"Giang Nguyen Ivan Brugere Shubham Sharma Sanjay Kariyappa Anh\u00a0Totti Nguyen and Freddy Lecue. 2024. Interpretable llm-based table question answering. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.12386 (2024)."},{"key":"e_1_3_3_2_40_2","first-page":"8169","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Nguyen Giang","year":"2024","unstructured":"Giang Nguyen, Mohammad\u00a0Reza Taesiri, Sunnie\u00a0SY Kim, and Anh Nguyen. 2024. Allowing humans to interactively guide machines where to look does not always improve human-AI team\u2019s classification accuracy. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8169\u20138175."},{"key":"e_1_3_3_2_41_2","unstructured":"Tin Nguyen Logan Bolton Mohammad\u00a0Reza Taesiri and Anh\u00a0Totti Nguyen. 2025. HoT: Highlighted Chain of Thought for Referencing Supporting Facts from Inputs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.02003 (2025)."},{"key":"e_1_3_3_2_42_2","unstructured":"Mahsan Nourani Chiradeep Roy Tahrima Rahman Eric\u00a0D Ragan Nicholas Ruozzi and Vibhav Gogate. 2020. Don\u2019t explain without verifying veracity: an evaluation of explainable ai with video activity recognition. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2005.02335 (2020)."},{"key":"e_1_3_3_2_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511140"},{"key":"e_1_3_3_2_44_2","unstructured":"Suleyman Ozdel Can Sarpkaya Efe Bozkir Hong Gao and Enkelejda Kasneci. 2025. Examining the Role of LLM-Driven Interactions on Attention and Cognitive Engagement in Virtual Classrooms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.07377 (2025)."},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"crossref","unstructured":"Vagelis Plevris George Papazafeiropoulos and Alejandro Jim\u00e9nez\u00a0Rios. 2023. Chatbots put to the test in math and logic problems: A comparison and assessment of ChatGPT-3.5 ChatGPT-4 and Google Bard. Ai 4 4 (2023) 949\u2013969.","DOI":"10.3390\/ai4040048"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"crossref","unstructured":"Yao Rong Tobias Leemann Thai-Trang Nguyen Lisa Fiedler Peizhu Qian Vaibhav Unhelkar Tina Seidel Gjergji Kasneci and Enkelejda Kasneci. 2023. Towards human-centered explainable ai: A survey of user studies for model explanations. IEEE transactions on pattern analysis and machine intelligence 46 4 (2023) 2104\u20132122.","DOI":"10.1109\/TPAMI.2023.3331846"},{"key":"e_1_3_3_2_47_2","unstructured":"Frank Spillers. 2010. Progressive disclosure."},{"key":"e_1_3_3_2_48_2","unstructured":"Aaron Springer and Steve Whittaker. 2018. Progressive disclosure: designing for effective transparency. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1811.02164 (2018)."},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"crossref","unstructured":"John Sweller. 1988. Cognitive load during problem solving: Effects on learning. Cognitive science 12 2 (1988) 257\u2013285.","DOI":"10.1207\/s15516709cog1202_4"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"crossref","unstructured":"Stefano Teso \u00d6znur Alkan Wolfgang Stammer and Elizabeth Daly. 2023. Leveraging explanations in interactive machine learning: An overview. Frontiers in Artificial Intelligence 6 (2023) 1066049.","DOI":"10.3389\/frai.2023.1066049"},{"key":"e_1_3_3_2_51_2","doi-asserted-by":"crossref","unstructured":"Miles Turpin Julian Michael Ethan Perez and Samuel Bowman. 2023. Language models don\u2019t always say what they think: Unfaithful explanations in chain-of-thought prompting. Advances in Neural Information Processing Systems 36 (2023) 74952\u201374965.","DOI":"10.52202\/075280-3275"},{"key":"e_1_3_3_2_52_2","unstructured":"Boshi Wang Sewon Min Xiang Deng Jiaming Shen You Wu Luke Zettlemoyer and Huan Sun. 2022. Towards understanding chain-of-thought prompting: An empirical study of what matters. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.10001 (2022)."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.153"},{"key":"e_1_3_3_2_54_2","unstructured":"Shen Wang Tianlong Xu Hang Li Chaoli Zhang Joleen Liang Jiliang Tang Philip\u00a0S Yu and Qingsong Wen. 2024. Large language models for education: A survey and outlook. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.18105 (2024)."},{"key":"e_1_3_3_2_55_2","doi-asserted-by":"crossref","unstructured":"Xuezhi Wang and Denny Zhou. 2024. Chain-of-thought reasoning without prompting. Advances in Neural Information Processing Systems 37 (2024) 66383\u201366409.","DOI":"10.52202\/079017-2123"},{"key":"e_1_3_3_2_56_2","doi-asserted-by":"crossref","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837.","DOI":"10.52202\/068431-1800"},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3377325.3377480"},{"key":"e_1_3_3_2_58_2","unstructured":"Yuhao Zhang Jiaxin An Ben Wang Yan Zhang and Jiqun Liu. 2025. Human-Centered Explainability in Interactive Information Systems: A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.02300 (2025)."},{"key":"e_1_3_3_2_59_2","unstructured":"Zhuosheng Zhang Aston Zhang Mu Li and Alex Smola. 2022. Automatic chain of thought prompting in large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2210.03493 (2022)."},{"key":"e_1_3_3_2_60_2","doi-asserted-by":"crossref","unstructured":"Haiyan Zhao Hanjie Chen Fan Yang Ninghao Liu Huiqi Deng Hengyi Cai Shuaiqiang Wang Dawei Yin and Mengnan Du. 2024. Explainability for large language models: A survey. ACM Transactions on Intelligent Systems and Technology 15 2 (2024) 1\u201338.","DOI":"10.1145\/3639372"},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"crossref","unstructured":"Leming Zhou Jie Bao I\u00a0Made\u00a0Agus Setiawan Andi Saptono and Bambang Parmanto. 2019. The mHealth app usability questionnaire (MAUQ): development and validation study. JMIR mHealth and uHealth 7 4 (2019) e11500.","DOI":"10.2196\/11500"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581266"},{"key":"e_1_3_3_2_63_2","unstructured":"Joerg Zumbach. 2006. Cognitive overhead in hypertext learning reexamined: Overcoming the myths. Journal of Educational Multimedia and Hypermedia 15 4 (2006) 411\u2013432."}],"event":{"name":"IUI '26: 31st International Conference on Intelligent User Interfaces","location":"Paphos Cyprus","acronym":"IUI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGAI ACM Special Interest Group on Artificial Intelligence"]},"container-title":["Proceedings of the 31st International Conference on Intelligent User Interfaces"],"original-title":[],"deposited":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T12:58:26Z","timestamp":1773493106000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3742413.3789134"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,22]]},"references-count":62,"alternative-id":["10.1145\/3742413.3789134","10.1145\/3742413"],"URL":"https:\/\/doi.org\/10.1145\/3742413.3789134","relation":{},"subject":[],"published":{"date-parts":[[2026,3,22]]},"assertion":[{"value":"2026-03-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}