{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T08:00:50Z","timestamp":1776931250406,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":74,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62502436"],"award-info":[{"award-number":["62502436"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["LMS26F020004"],"award-info":[{"award-number":["LMS26F020004"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,13]]},"DOI":"10.1145\/3772318.3791059","type":"proceedings-article","created":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T06:44:11Z","timestamp":1776062651000},"page":"1-19","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Seeing Eye to Eye: Enabling Cognitive Alignment Through Shared First-Person Perspective in Human\u2013AI Collaboration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-9276-6101","authenticated-orcid":false,"given":"Zhuyu","family":"Teng","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0962-6459","authenticated-orcid":false,"given":"Pei","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5422-8064","authenticated-orcid":false,"given":"Yichen","family":"Cai","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0534-6202","authenticated-orcid":false,"given":"Ruoqing","family":"Lu","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7801-4279","authenticated-orcid":false,"given":"Zhaoqu","family":"Jiang","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1270-6445","authenticated-orcid":false,"given":"Jiayang","family":"Li","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9625-5547","authenticated-orcid":false,"given":"Weitao","family":"You","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5561-0493","authenticated-orcid":false,"given":"Lingyun","family":"Sun","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2026,4,13]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0W Andrews J\u00a0Mason Lilly Divya Srivastava and Karen\u00a0M Feigh. 2023. The role of shared mental models in human-AI teams: a theoretical review. Theoretical Issues in Ergonomics Science 24 2 (2023) 129\u2013175.","DOI":"10.1080\/1463922X.2022.2061080"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3026033"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"crossref","unstructured":"Markus Appel Nina Krisch Jan-Philipp Stein and Silvana Weber. 2019. Smartphone zombies! Pedestrians\u2019 distracted walking as a function of their fear of missing out. Journal of Environmental Psychology 63 (2019) 130\u2013133.","DOI":"10.1016\/j.jenvp.2019.04.003"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Riku Arakawa Jill\u00a0Fain Lehman and Mayank Goel. 2024. Prism-q&a: Step-aware voice assistant on a smartwatch enabled by multimodal procedure tracking and large language models. Proceedings of the ACM on Interactive Mobile Wearable and Ubiquitous Technologies 8 4 (2024) 1\u201326.","DOI":"10.1145\/3699759"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2015.89"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642242"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Reimer Bierhals Ilona Schuster P Kohler and Petra Badke-Schaub. 2007. Shared mental models\u2014linking team cognition and performance. CoDesign 3 1 (2007) 75\u201394.","DOI":"10.1080\/15710880601170891"},{"key":"e_1_3_3_1_9_2","unstructured":"Frank Biocca and Chad Harms. 2003. Networked Minds Social Presence Inventory:|(Scales only Version 1.2) Measures of co-presence social presence subjective symmetry and intersubjective symmetry. (2003)."},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-5251-4_103"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2016.XII.034"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713953"},{"key":"e_1_3_3_1_13_2","unstructured":"Janis\u00a0A Cannon-Bowers Eduardo Salas and Sharolyn Converse. 1993. Shared mental models in expert team decision making. Individual and group decision making: Current issues 221 (1993) 221\u201346."},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"crossref","unstructured":"Wesley\u00a0P Chan Morgan Crouch Khoa Hoang Charlie Chen Nicole Robinson and Elizabeth Croft. 2025. Improving Human\u2013Robot Collaboration through Augmented Reality and Eye Gaze. ACM Transactions on Human-Robot Interaction 14 3 (2025) 1\u201319.","DOI":"10.1145\/3716175"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01355"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714317"},{"key":"e_1_3_3_1_17_2","unstructured":"Herbert\u00a0H Clark and Susan\u00a0E Brennan. 1991. Grounding in communication. (1991)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/1357054.1357310"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Nikunj\u00a0P Dalal and George\u00a0M Kasper. 1994. The design of joint cognitive systems: the effect of cognitive coupling on performance. International Journal of Human-Computer Studies 40 4 (1994) 677\u2013702.","DOI":"10.1006\/ijhc.1994.1031"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"crossref","unstructured":"Dominik Dellermann Philipp Ebel Matthias S\u00f6llner and Jan\u00a0Marco Leimeister. 2019. Hybrid intelligence. Business & Information Systems Engineering 61 5 (2019) 637\u2013643.","DOI":"10.1007\/s12599-019-00595-2"},{"key":"e_1_3_3_1_21_2","unstructured":"Alan Dix Janet Finlay Gregory\u00a0D Abowd and Russell Beale. 2003. Human-Computer Interaction Third. Harlow: Pearson Education (2003) 15."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.4324\/9781315087924-3"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Toma\u017e Flegar. 2025. First-Person\/System Perspective and AI Black Box. System Perspective and AI Black Box (June 09 2025) (2025).","DOI":"10.2139\/ssrn.5286210"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01834"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2207763"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1145\/2875194.2875237"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1177\/154193120605000909"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580983"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Jean-Michel Hoc. 2001. Towards a cognitive approach to human\u2013machine cooperation in dynamic situations. International journal of human-computer studies 54 4 (2001) 509\u2013540.","DOI":"10.1006\/ijhc.2000.0454"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"James Hollan Edwin Hutchins and David Kirsh. 2000. Distributed cognition: toward a new foundation for human-computer interaction research. ACM Transactions on Computer-Human Interaction (TOCHI) 7 2 (2000) 174\u2013196.","DOI":"10.1145\/353485.353487"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ROMAN.2011.6005230"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445283"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02084"},{"key":"e_1_3_3_1_34_2","unstructured":"Yifei Huang Jilan Xu Baoqi Pei Yuping He Guo Chen Lijin Yang Xinyuan Chen Yaohui Wang Zheng Nie Jinyao Liu et\u00a0al. 2024. Vinci: A real-time embodied smart assistant based on egocentric vision-language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.21080 (2024)."},{"key":"e_1_3_3_1_35_2","unstructured":"Aaron Hurst Adam Lerer Adam\u00a0P Goucher Adam Perelman Aditya Ramesh Aidan Clark AJ Ostrow Akila Welihinda Alan Hayes Alec Radford et\u00a0al. 2024. Gpt-4o system card. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.21276 (2024)."},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Baoxiong Jia Ting Lei Song-Chun Zhu and Siyuan Huang. 2022. Egotaskqa: Understanding human tasks in egocentric videos. Advances in Neural Information Processing Systems 35 (2022) 3343\u20133360.","DOI":"10.52202\/068431-0242"},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"crossref","unstructured":"Jiun-Yin Jian Ann\u00a0M Bisantz and Colin\u00a0G Drury. 2000. Foundations for an empirically determined scale of trust in automated systems. International journal of cognitive ergonomics 4 1 (2000) 53\u201371.","DOI":"10.1207\/S15327566IJCE0401_04"},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3641901"},{"key":"e_1_3_3_1_39_2","volume-title":"Ultralytics YOLO","author":"Jocher Glenn","year":"2023","unstructured":"Glenn Jocher, Jing Qiu, and Ayush Chaurasia. 2023. Ultralytics YOLO. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581444"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"crossref","unstructured":"Gary Klein Paul\u00a0J Feltovich Jeffrey\u00a0M Bradshaw and David\u00a0D Woods. 2005. Common ground and coordination in joint activity. Organizational simulation 53 (2005) 139\u2013184.","DOI":"10.1002\/0471739448.ch6"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Glen Klien David\u00a0D Woods Jeffrey\u00a0M Bradshaw Robert\u00a0R Hoffman and Paul\u00a0J Feltovich. 2005. Ten challenges for making automation a\" team player\" in joint human-agent activity. IEEE Intelligent Systems 19 6 (2005) 91\u201395.","DOI":"10.1109\/MIS.2004.74"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/INISTA52262.2021.9548570"},{"key":"e_1_3_3_1_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642230"},{"key":"e_1_3_3_1_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714188"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676470"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"crossref","unstructured":"John\u00a0E Mathieu Tonia\u00a0S Heffner Gerald\u00a0F Goodwin Eduardo Salas and Janis\u00a0A Cannon-Bowers. 2000. The influence of shared mental models on team process and performance. Journal of applied psychology 85 2 (2000) 273.","DOI":"10.1037\/0021-9010.85.2.273"},{"key":"e_1_3_3_1_48_2","first-page":"649","volume-title":"Field and service robotics: results of the 11th international conference","author":"Mingyue\u00a0Ma Lanssie","year":"2017","unstructured":"Lanssie Mingyue\u00a0Ma, Terrence Fong, Mark\u00a0J Micire, Yun\u00a0Kyung Kim, and Karen Feigh. 2017. Human-robot teaming: Concepts and components for design. In Field and service robotics: results of the 11th international conference. Springer, 649\u2013663."},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"crossref","unstructured":"Meredith\u00a0Ringel Morris. 2025. HCI for AGI. Interactions 32 2 (2025) 26\u201332.","DOI":"10.1145\/3708815"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376565"},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"crossref","unstructured":"Peter Mundy and Lisa Newell. 2007. Attention joint attention and social cognition. Current directions in psychological science 16 5 (2007) 269\u2013274.","DOI":"10.1111\/j.1467-8721.2007.00518.x"},{"key":"e_1_3_3_1_52_2","doi-asserted-by":"crossref","unstructured":"Sharon Oviatt and Philip Cohen. 2000. Perceptual user interfaces: multimodal interfaces that process what comes naturally. Commun. ACM 43 3 (2000) 45\u201353.","DOI":"10.1145\/330534.330538"},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3746059.3747770"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW67362.2025.00278"},{"key":"e_1_3_3_1_55_2","doi-asserted-by":"crossref","unstructured":"Jeba Rezwana and Mary\u00a0Lou Maher. 2023. Designing creative AI partners with COFI: A framework for modeling interaction in human-AI co-creative systems. ACM Transactions on Computer-Human Interaction 30 5 (2023) 1\u201328.","DOI":"10.1145\/3519026"},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642564"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"crossref","unstructured":"Beau\u00a0G Schelble Christopher Flathmann Nathan\u00a0J McNeese Guo Freeman and Rohit Mallick. 2022. Let\u2019s think together! Assessing shared mental models performance and trust in human-agent teams. Proceedings of the ACM on Human-Computer Interaction 6 GROUP (2022) 1\u201329.","DOI":"10.1145\/3492832"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"crossref","unstructured":"Philipp Schmidt and Sophie Loidolt. 2023. Interacting with machines: can an artificially intelligent agent be a partner? Philosophy & Technology 36 3 (2023) 55.","DOI":"10.1007\/s13347-023-00656-1"},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"crossref","unstructured":"Isabella Seeber Eva Bittner Robert\u00a0O Briggs Triparna De\u00a0Vreede Gert-Jan De\u00a0Vreede Aaron Elkins Ronald Maier Alexander\u00a0B Merz Sarah Oeste-Rei\u00df Nils Randrup et\u00a0al. 2020. Machines as teammates: A research agenda on AI in team collaboration. Information & management 57 2 (2020) 103174.","DOI":"10.1016\/j.im.2019.103174"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"crossref","unstructured":"Yang Shi Tian Gao Xiaohan Jiao and Nan Cao. 2023. Understanding design collaboration between designers and artificial intelligence: a systematic literature review. Proceedings of the ACM on Human-Computer Interaction 7 CSCW2 (2023) 1\u201335.","DOI":"10.1145\/3610217"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"crossref","unstructured":"Ben Shneiderman. 2020. Human-centered artificial intelligence: Reliable safe & trustworthy. International Journal of Human\u2013Computer Interaction 36 6 (2020) 495\u2013504.","DOI":"10.1080\/10447318.2020.1741118"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3677386.3682095"},{"key":"e_1_3_3_1_63_2","doi-asserted-by":"crossref","unstructured":"Anna Syberfeldt Magnus Holm Oscar Danielsson Lihui Wang and Rodney\u00a0Lindgren Brewster. 2016. Support systems on the industrial shop-floors of the future\u2013operators\u2019 perspective on augmented reality. Procedia Cirp 44 (2016) 108\u2013113.","DOI":"10.1016\/j.procir.2016.02.017"},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3714294"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"crossref","unstructured":"Keshara Weerasinghe Saahith Janapati Xueren Ge Sion Kim Sneha Iyer John\u00a0A Stankovic and Homa Alemzadeh. 2024. Real-Time Multimodal Cognitive Assistant for Emergency Medical Services. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.06734 (2024).","DOI":"10.1109\/IoTDI61053.2024.00012"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"crossref","unstructured":"Giles Westerfield Antonija Mitrovic and Mark Billinghurst. 2015. Intelligent augmented reality training for motherboard assembly. International Journal of Artificial Intelligence in Education 25 1 (2015) 157\u2013172.","DOI":"10.1007\/s40593-014-0032-x"},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581282"},{"key":"e_1_3_3_1_68_2","unstructured":"Jiang Xu Qiyang Miao Ziyuan Huang Yilin Lu Lingyun Sun Tianyang Yu Jingru Pei and Qichao Zhao. 2024. Intuitive interaction flow: A Dual-Loop Human-Machine Collaboration Task Allocation Model and an experimental study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.07804 (2024)."},{"key":"e_1_3_3_1_69_2","doi-asserted-by":"publisher","DOI":"10.1109\/VR58804.2024.00108"},{"key":"e_1_3_3_1_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713356"},{"key":"e_1_3_3_1_71_2","doi-asserted-by":"crossref","unstructured":"Hongbo Zhang Yifei Wu and Pei Chen. 2025. IEDS in Practice: A Comparative Study of an Intelli-Embodied Design Space Combining AR and GAI for Conceptual Design. Design and Artificial Intelligence (2025) 100049.","DOI":"10.1016\/j.daai.2025.100049"},{"key":"e_1_3_3_1_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706598.3713110"},{"key":"e_1_3_3_1_73_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19778-9_39"},{"key":"e_1_3_3_1_74_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643834.3660683"},{"key":"e_1_3_3_1_75_2","doi-asserted-by":"crossref","unstructured":"Yehong Zhou and Chun-Hsien Chen. 2025. Examining the Impact of Large Language Models on Design: Functions Strengths Limitations and Roles. Design and Artificial Intelligence (2025) 100017.","DOI":"10.1016\/j.daai.2025.100017"}],"event":{"name":"CHI 2026: CHI Conference on Human Factors in Computing Systems","location":"Barcelona Spain","acronym":"CHI '26","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772318.3791059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T07:42:28Z","timestamp":1776066148000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772318.3791059"}},"subtitle":["Seeing Eye to Eye"],"short-title":[],"issued":{"date-parts":[[2026,4,13]]},"references-count":74,"alternative-id":["10.1145\/3772318.3791059","10.1145\/3772318"],"URL":"https:\/\/doi.org\/10.1145\/3772318.3791059","relation":{},"subject":[],"published":{"date-parts":[[2026,4,13]]},"assertion":[{"value":"2026-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}