{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T15:31:51Z","timestamp":1784734311633,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,25]],"date-time":"2025-04-25T00:00:00Z","timestamp":1745539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["R01 LM013330"],"award-info":[{"award-number":["R01 LM013330"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,26]]},"DOI":"10.1145\/3706598.3714210","type":"proceedings-article","created":{"date-parts":[[2025,4,24]],"date-time":"2025-04-24T03:33:32Z","timestamp":1745465612000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Beyond Visual Perception: Insights from Smartphone Interaction of Visually Impaired Users with Large Multimodal Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2753-2360","authenticated-orcid":false,"given":"Jingyi","family":"Xie","sequence":"first","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, University Park, Pennsylvania, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0946-6769","authenticated-orcid":false,"given":"Rui","family":"Yu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University of Louisville, Louisville, Kentucky, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8169-1653","authenticated-orcid":false,"given":"He","family":"Zhang","sequence":"additional","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, State College, Pennsylvania, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5063-3808","authenticated-orcid":false,"given":"Syed Masum","family":"Billah","sequence":"additional","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, University Park, Pennsylvania, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4971-2004","authenticated-orcid":false,"given":"Sooyeon","family":"Lee","sequence":"additional","affiliation":[{"name":"Ying Wu College of Computing, New Jersey Institute of Technology, Newark, New Jersey, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5189-337X","authenticated-orcid":false,"given":"John M.","family":"Carroll","sequence":"additional","affiliation":[{"name":"College of Information Sciences and Technology, Pennsylvania State University, University Park, Pennsylvania, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,4,25]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"2024. Aira. https:\/\/aira.io\/."},{"key":"e_1_3_3_3_3_2","unstructured":"2024. Announcing \u201cBe My AI \u201d Soon Available for Hundreds of Thousands of Be My Eyes Users. Retrieved September 1 2024 from https:\/\/www.bemyeyes.com\/blog\/announcing-be-my-ai"},{"key":"e_1_3_3_3_4_2","unstructured":"2024. Be My Eyes - See the world together. https:\/\/www.bemyeyes.com\/."},{"key":"e_1_3_3_3_5_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. GPT-4 Technical Report. arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_3_6_2","unstructured":"Rudaiba Adnin and Maitraye Das. 2024. \u201cI look at it as the king of knowledge\u201d: How Blind People Use and Understand Generative AI Tools. people 16 54 (2024) 92."},{"key":"e_1_3_3_3_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376143"},{"key":"e_1_3_3_3_8_2","unstructured":"Yuntao Bai Andy Jones Kamal Ndousse Amanda Askell Anna Chen Nova DasSarma Dawn Drain Stanislav Fort Deep Ganguli Tom Henighan et\u00a0al. 2022. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.05862 (2022)."},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/HSI.2015.7170662"},{"key":"e_1_3_3_3_10_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaaiss.v3i1.31232"},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445498"},{"key":"e_1_3_3_3_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/1866029.1866080"},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2010.5543821"},{"key":"e_1_3_3_3_14_2","doi-asserted-by":"crossref","unstructured":"Virginia Braun and Victoria Clarke. 2006. Using thematic analysis in psychology. Qualitative research in psychology 3 2 (2006) 77\u2013101.","DOI":"10.1191\/1478088706qp063oa"},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/HSI.2008.4581561"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/2384916.2384941"},{"key":"e_1_3_3_3_17_2","unstructured":"Ben Caldwell Michael Cooper Loretta\u00a0Guarino Reid Gregg Vanderheiden Wendy Chisholm John Slatin and Jason White. 2008. Web content accessibility guidelines (WCAG) 2.0. WWW Consortium (W3C) 290 1-34 (2008) 5\u201312."},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"crossref","unstructured":"John\u00a0M. Carroll Sooyeon Lee Madison Reddie Jordan Beck and Mary\u00a0Beth Rosson. 2020. Human-Computer Synergies in Prosthetic Interactions. IxD&A 44 (2020) 29\u201352. http:\/\/www.mifav.uniroma2.it\/inevent\/events\/idea2010\/doc\/44_2.pdf","DOI":"10.55612\/s-5002-044-002"},{"key":"e_1_3_3_3_19_2","doi-asserted-by":"publisher","unstructured":"Paul Chandler and John Sweller. 1991. Cognitive load theory and the format of instruction. Cognition and instruction 8 4 (1991) 293\u2013332. 10.1207\/s1532690xci0804_2","DOI":"10.1207\/s1532690xci0804_2"},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49655-9_2"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"publisher","DOI":"10.22318\/icls2024.594746"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"crossref","unstructured":"Vanja Garaj Rommanee Jirawimut Piotr Ptasinski Franjo Cecelja and Wamadeva Balachandran. 2003. A system for remote sighted guidance of visually impaired pedestrians. British Journal of Visual Impairment 21 2 (2003) 55\u201363.","DOI":"10.1177\/026461960302100204"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642211"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Christina Granquist Susan\u00a0Y Sun Sandra\u00a0R Montezuma Tu\u00a0M Tran Rachel Gage and Gordon\u00a0E Legge. 2021. Evaluation and comparison of artificial intelligence vision aids: Orcam myeye 1 and seeing ai. Journal of Visual Impairment & Blindness 115 4 (2021) 277\u2013285.","DOI":"10.1177\/0145482X211027492"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00380"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_25"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642817"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3461702.3462620"},{"key":"e_1_3_3_3_29_2","doi-asserted-by":"crossref","unstructured":"Nicole Holmes and Kelly Prentice. 2015. iPhone video link facetime as an orientation tool: remote O&M for people with vision impairment. International Journal of Orientation & Mobility 7 1 (2015) 60\u201368.","DOI":"10.21307\/ijom-2017-057"},{"key":"e_1_3_3_3_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3517428.3544824"},{"key":"e_1_3_3_3_31_2","doi-asserted-by":"crossref","unstructured":"Ziad Hunaiti Vanja Garaj and Wamadeva Balachandran. 2006. A remote vision guidance system for visually impaired pedestrians. The Journal of Navigation 59 3 (2006) 497\u2013504.","DOI":"10.1017\/S0373463306003894"},{"key":"e_1_3_3_3_32_2","first-page":"1","volume-title":"Proceedings of the CHI Conference on Human Factors in Computing Systems","author":"Jiang Lucy","year":"2024","unstructured":"Lucy Jiang, Crescentia Jung, Mahika Phutane, Abigale Stangl, and Shiri Azenkot. 2024. \u201cIt\u2019s Kind of Context Dependent\u201d: Understanding Blind and Low Vision People\u2019s Video Accessibility Preferences Across Viewing Scenarios. In Proceedings of the CHI Conference on Human Factors in Computing Systems. 1\u201320."},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376823"},{"key":"e_1_3_3_3_34_2","unstructured":"Elizabeth Kupferstein Yuhang Zhao Shiri Azenkot and Hathaitorn Rojnirun. 2020. Understanding the use of artificial intelligence based visual aids for people with visual impairments. Investigative Ophthalmology & Visual Science 61 7 (2020) 932\u2013932."},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"publisher","DOI":"10.1145\/2047196.2047200"},{"key":"e_1_3_3_3_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/2513383.2517033"},{"key":"e_1_3_3_3_37_2","unstructured":"Sooyeon Lee Madison Reddie Krish Gurdasani Xiying Wang Jordan Beck Mary\u00a0Beth Rosson and John\u00a0M. Carroll. 2018. Conversations for Vision: Remote Sighted Assistants Helping People with Visual Impairments. arxiv:https:\/\/arXiv.org\/abs\/1812.00148\u00a0[cs.HC]"},{"key":"e_1_3_3_3_38_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376591"},{"key":"e_1_3_3_3_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511113"},{"key":"e_1_3_3_3_40_2","volume-title":"Advances in Neural Information Processing Systems","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et\u00a0al. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_3_3_41_2","first-page":"17","volume-title":"2024 CHI Conference on Human Factors in Computing Sytems, CHI 2024","author":"Liu Yiren","year":"2024","unstructured":"Yiren Liu, Si Chen, Haocong Cheng, Mengxia Yu, Xiao Ran, Andrew Mo, Yiliu Tang, and Yun Huang. 2024. CoQuest: Exploring research question co-creation with an LLM-based agent. In 2024 CHI Conference on Human Factors in Computing Sytems, CHI 2024. Association for Computing Machinery, 17."},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642939"},{"key":"e_1_3_3_3_43_2","unstructured":"Microsoft. 2022. Seeing AI - Talking camera app for those with a visual impairment. https:\/\/www.microsoft.com\/en-us\/ai\/seeing-ai."},{"key":"e_1_3_3_3_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597638.3608395"},{"key":"e_1_3_3_3_45_2","first-page":"7076","volume-title":"International Conference on Machine Learning","author":"Mozannar Hussein","year":"2020","unstructured":"Hussein Mozannar and David Sontag. 2020. Consistent estimators for learning to defer to an expert. In International Conference on Machine Learning. PMLR, 7076\u20137087."},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"crossref","unstructured":"Mukhriddin Mukhiddinov Akmalbek\u00a0Bobomirzaevich Abdusalomov and Jinsoo Cho. 2022. Automatic fire detection and notification system based on improved YOLOv4 for the blind and visually impaired. Sensors 22 9 (2022) 3307.","DOI":"10.3390\/s22093307"},{"key":"e_1_3_3_3_47_2","doi-asserted-by":"crossref","unstructured":"Deirdre\u00a0K Mulligan and Helen Nissenbaum. 2020. The concept of handoff as a model for ethical analysis and design. The Oxford handbook of ethics of AI 1 1 (2020) 233.","DOI":"10.1093\/oxfordhb\/9780190067397.013.15"},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","DOI":"10.1145\/3663548.3675617"},{"key":"e_1_3_3_3_49_2","doi-asserted-by":"crossref","unstructured":"Ethan Perez Saffron Huang Francis Song Trevor Cai Roman Ring John Aslanides Amelia Glaese Nat McAleese and Geoffrey Irving. 2022. Red teaming language models with language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2202.03286 (2022).","DOI":"10.18653\/v1\/2022.emnlp-main.225"},{"key":"e_1_3_3_3_50_2","doi-asserted-by":"publisher","unstructured":"Ishaani Priyadarshini Rohit Sharma Dhowmya Bhatt and M Al-Numay. 2023. Human activity recognition in cyber-physical systems using optimized machine learning techniques. Cluster Computing 26 4 (2023) 2199\u20132215. 10.1007\/s10586-022-03662-8","DOI":"10.1007\/s10586-022-03662-8"},{"key":"e_1_3_3_3_51_2","unstructured":"Maithra Raghu Katy Blumer Greg Corrado Jon Kleinberg Ziad Obermeyer and Sendhil Mullainathan. 2019. The algorithmic automation problem: Prediction triage and human effort. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1903.12220 (2019)."},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00918"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/MED.2014.6961320"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376404"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.1145\/3441852.3471233"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","unstructured":"John Sweller. 1988. Cognitive load during problem solving: Effects on learning. Cognitive science 12 2 (1988) 257\u2013285. 10.1016\/0364-0213(88)90023-7","DOI":"10.1016\/0364-0213(88)90023-7"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00135"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00541"},{"key":"e_1_3_3_3_59_2","unstructured":"Xuezhi Wang Jason Wei Dale Schuurmans Quoc Le Ed Chi Sharan Narang Aakanksha Chowdhery and Denny Zhou. 2022. Self-consistency improves chain of thought reasoning in language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.11171 (2022)."},{"key":"e_1_3_3_3_60_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00894"},{"key":"e_1_3_3_3_61_2","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_3_62_2","doi-asserted-by":"crossref","unstructured":"Jingyi Xie Madison Reddie Sooyeon Lee Syed\u00a0Masum Billah Zihan Zhou Chun-hua Tsai and John\u00a0M Carroll. 2022. Iterative Design and Prototyping of Computer Vision Mediated Remote Sighted Assistance. ACM Transactions on Computer-Human Interaction (TOCHI) 29 4 (2022) 1\u201340.","DOI":"10.1145\/3501298"},{"key":"e_1_3_3_3_63_2","doi-asserted-by":"publisher","DOI":"10.1145\/3563657.3596019"},{"key":"e_1_3_3_3_64_2","doi-asserted-by":"publisher","DOI":"10.1145\/3532106.3533560"},{"key":"e_1_3_3_3_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642030"},{"key":"e_1_3_3_3_66_2","doi-asserted-by":"crossref","unstructured":"Bufang Yang Lixing He Kaiwei Liu and Zhenyu Yan. 2024. VIAssist: Adapting Multi-modal Large Language Models for Users with Visual Impairments. arXiv:https:\/\/arXiv.org\/abs\/2404.02508 (2024).","DOI":"10.1109\/FMSys62467.2024.00010"},{"key":"e_1_3_3_3_67_2","doi-asserted-by":"crossref","unstructured":"Rui Yu Sooyeon Lee Jingyi Xie Syed\u00a0Masum Billah and John\u00a0M Carroll. 2024. Human\u2013AI Collaboration for Remote Sighted Assistance: Perspectives from the LLM Era. Future Internet 16 7 (2024) 254.","DOI":"10.3390\/fi16070254"},{"key":"e_1_3_3_3_68_2","unstructured":"Weihao Yu Zhengyuan Yang Linjie Li Jianfeng Wang Kevin Lin Zicheng Liu Xinchao Wang and Lijuan Wang. 2023. Mm-vet: Evaluating large multimodal models for integrated capabilities. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.02490 (2023)."},{"key":"e_1_3_3_3_69_2","doi-asserted-by":"publisher","DOI":"10.1145\/3688828.3699636"},{"key":"e_1_3_3_3_70_2","unstructured":"Yi Zhao Yilin Zhang Rong Xiang Jing Li and Hillming Li. 2024. VIALM: A Survey and Benchmark of Visually Impaired Assistance with Large Models. arXiv:https:\/\/arXiv.org\/abs\/2402.01735 (2024)."},{"key":"e_1_3_3_3_71_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29946"}],"event":{"name":"CHI 2025: CHI Conference on Human Factors in Computing Systems","location":"Yokohama Japan","acronym":"CHI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 2025 CHI Conference on Human Factors in Computing Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3714210","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3706598.3714210","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T05:09:31Z","timestamp":1751605771000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3706598.3714210"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,25]]},"references-count":70,"alternative-id":["10.1145\/3706598.3714210","10.1145\/3706598"],"URL":"https:\/\/doi.org\/10.1145\/3706598.3714210","relation":{},"subject":[],"published":{"date-parts":[[2025,4,25]]},"assertion":[{"value":"2025-04-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}