{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T19:53:08Z","timestamp":1776109988216,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":82,"publisher":"ACM","funder":[{"name":"National Science Foundation","award":["IIS-2328182"],"award-info":[{"award-number":["IIS-2328182"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,28]]},"DOI":"10.1145\/3746059.3747641","type":"proceedings-article","created":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T07:44:49Z","timestamp":1758959089000},"page":"1-17","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["VRSight: An AI-Driven Scene Description System to Improve Virtual Reality Accessibility for Blind People"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-2623-0528","authenticated-orcid":false,"given":"Daniel","family":"Killough","sequence":"first","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-9685-8238","authenticated-orcid":false,"given":"Justin","family":"Feng","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-4667-6094","authenticated-orcid":false,"given":"Zheng Xue","family":"Ching","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5844-5171","authenticated-orcid":false,"given":"Daniel","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1261-751X","authenticated-orcid":false,"given":"Rithvik","family":"Dyava","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1423-4513","authenticated-orcid":false,"given":"Yapeng","family":"Tian","sequence":"additional","affiliation":[{"name":"Department of Computer Science, The University of Texas at Dallas, Richardson, Texas, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3686-695X","authenticated-orcid":false,"given":"Yuhang","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,27]]},"reference":[{"key":"e_1_3_3_2_2_2","volume-title":"Create Augmented Images","year":"2023","unstructured":"Roboflow 2023. Create Augmented Images. Roboflow. https:\/\/docs.roboflow.com\/datasets\/image-augmentation"},{"key":"e_1_3_3_2_3_2","volume-title":"Learn about accessibility features for Meta Quest","year":"2024","unstructured":"Meta 2024. Learn about accessibility features for Meta Quest. Meta. https:\/\/www.meta.com\/help\/quest\/articles\/in-vr-experiences\/oculus-features\/accessibility-features\/"},{"key":"e_1_3_3_2_4_2","volume-title":"Roboflow: Build and Deploy Computer Vision Applications","year":"2025","unstructured":"Roboflow 2025. Roboflow: Build and Deploy Computer Vision Applications. Roboflow. https:\/\/roboflow.com\/ Used by over 1 million engineers to create datasets, train models, and deploy to production.."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"crossref","unstructured":"Amandine Afonso-Jaco and Brian\u00a0FG Katz. 2022. Spatial knowledge via auditory information for blind individuals: Spatial cognition studies and the use of audio-VR. Sensors 22 13 (2022) 4794.","DOI":"10.3390\/s22134794"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544549.3583909"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Oluleke Bamodu and Xu\u00a0Ming Ye. 2013. Virtual reality and virtual reality system components. Advanced materials research 765 (2013) 1169\u20131172.","DOI":"10.4028\/www.scientific.net\/AMR.765-767.1169"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/1296843.1296884"},{"key":"e_1_3_3_2_9_2","unstructured":"Hansj\u00f6rg Bittner. 2012. Audio description guidelines: A comparison. New perspectives in translation 20 (2012) 41\u201361."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Carmen\u00a0J Branje and Deborah\u00a0I Fels. 2012. Livedescribe: can amateur describers create high-quality audio description? Journal of Visual Impairment & Blindness 106 3 (2012) 154\u2013165.","DOI":"10.1177\/0145482X1210600304"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545613"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Gregory\u00a0D Clemenson Antonella Maselli Alexander\u00a0J Fiannaca Amos Miller and Mar Gonzalez-Franco. 2021. Rethinking GPS navigation: creating cognitive maps through auditory clues. Scientific reports 11 1 (2021) 7764.","DOI":"10.1038\/s41598-021-87148-4"},{"key":"e_1_3_3_2_13_2","first-page":"45","volume-title":"International Conference on Computers Helping People with Special Needs (ICCHP)","author":"Coughlan James","year":"2020","unstructured":"James Coughlan, Joseph Miele, and Natalia Guerrero. 2020. Simple Directional Prompts Improve Audio Navigation for Blind Users in Spatial Interfaces. In International Conference on Computers Helping People with Special Needs (ICCHP). Springer, 45\u201357."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1145\/3607822.3614540"},{"key":"e_1_3_3_2_15_2","unstructured":"Mr\u00a0Ninad\u00a0Janardan Dani. 2019. Impact of virtual reality on gaming. Virtual Reality 6 12 (2019) 2033\u20132036."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/SBGames.2015.26"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491101.3519692"},{"key":"e_1_3_3_2_18_2","unstructured":"Be\u00a0My Eyes. 2023 (accessed May 2023). Be My Eyes. https:\/\/www.bemyeyes.com."},{"key":"e_1_3_3_2_19_2","unstructured":"Freedom Scientific. 2024. JAWS: Job Access With Speech. https:\/\/www.freedomscientific.com\/Products\/software\/JAWS. Commercial screen reader for Windows."},{"key":"e_1_3_3_2_20_2","unstructured":"Nazaret Fresno Judit Castell\u00e0\u00a0Mate and Olga Soler\u00a0Vilageliu. 2014. Less is more. Effects of the amount of information and its presentation in the recall and reception of audio described characters. International Journal of Sciences: Basic and Applied Research (IJSBAR) (2014)."},{"key":"e_1_3_3_2_21_2","volume-title":"Google Earth VR","author":"LLC Google","year":"2016","unstructured":"Google LLC. 2016. Google Earth VR. https:\/\/www.meta.com\/experiences\/pcvr\/google-earth-vr\/1513995308673845\/"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/1054972.1055012"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","unstructured":"Kotaro Hara Shiri Azenkot Megan Campbell Cynthia\u00a0L Bennett Vicki Le Sean Pannella Robert Moore Kelly Minckler Rochelle\u00a0H Ng and Jon\u00a0E Froehlich. 2015. Improving public transit accessibility for blind riders by crowdsourcing bus stop landmark locations with google street view: An extended analysis. ACM Transactions on Accessible Computing (TACCESS) 6 2 (2015) 1\u201323. 10.1145\/2717513","DOI":"10.1145\/2717513"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1145\/3373625.3417006"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/1182475.1182492"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"crossref","unstructured":"Elliot Hu-Au and Joey\u00a0J Lee. 2017. Virtual reality in education: a tool for learning in the experience age. International Journal of Innovation in Education 4 4 (2017) 215\u2013226.","DOI":"10.1504\/IJIIE.2017.091481"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581085"},{"key":"e_1_3_3_2_28_2","unstructured":"The Smith-Kettlewell Eye\u00a0Research Institute. 2019. YouDescribe.com. https:\/\/youdescribe.org\/."},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606830"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Gunnar Jansson Helen Petrie Chetz Colwell Diana Kornbrot J F\u00e4nger H K\u00f6nig Katarina Billberger Andrew Hardwick and Stephen Furner. 1999. Haptic Virtual Environments for Blind People: Exploratory Experiments With Two Devices. International Journal of Virtual Reality 4 1 (1999) 8\u201317.","DOI":"10.20870\/IJVR.1999.4.1.2663"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1145\/3517428.3544821"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597638.3608381"},{"key":"e_1_3_3_2_33_2","volume-title":"Ultralytics YOLOv8","author":"Jocher Glenn","year":"2023","unstructured":"Glenn Jocher, Ayush Chaurasia, and Jing Qiu. 2023. Ultralytics YOLOv8. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3663548.3675663"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","unstructured":"Sam Kavanagh Andrew Luxton-Reilly Burkhard Wuensche and Beryl Plimmer. 2017. A Systematic Review of Virtual Reality in Education. Themes in Science and Technology Education 10 2 (2017) 85\u2013119. 10.1109\/ICWT47785.2019.8978263","DOI":"10.1109\/ICWT47785.2019.8978263"},{"key":"e_1_3_3_2_36_2","unstructured":"Daniel Killough Tiger\u00a0F Ji Kexin Zhang Yaxin Hu Yu Huang Ruofei Du and Yuhang Zhao. 2024. XR for All: Understanding Developer Perspectives on Accessibility Integration in Extended Reality. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.16321 (2024)."},{"key":"e_1_3_3_2_37_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597638.3608425"},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","unstructured":"T. Lokki and M. Grohn. 2005. Navigation With Auditory Cues in a Virtual Environment. IEEE MultiMedia 12 2 (2005) 80\u201386. 10.1109\/MMUL.2005.33","DOI":"10.1109\/MMUL.2005.33"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"publisher","DOI":"10.1145\/3410404.3414266"},{"key":"e_1_3_3_2_41_2","doi-asserted-by":"publisher","DOI":"10.31782\/IJCRR.2020.122032"},{"key":"e_1_3_3_2_42_2","doi-asserted-by":"crossref","unstructured":"Fabrizia Mantovani Gianluca Castelnuovo Andrea Gaggioli and Giuseppe Riva. 2003. Virtual reality training for health-care professionals. CyberPsychology & Behavior 6 4 (2003) 389\u2013395.","DOI":"10.1089\/109493103322278772"},{"key":"e_1_3_3_2_43_2","volume-title":"Shop Meta Quest VR Games, Apps, Deals and More","year":"2025","unstructured":"Meta. 2025. Shop Meta Quest VR Games, Apps, Deals and More. Meta Platforms, Inc. https:\/\/www.meta.com\/experiences\/"},{"key":"e_1_3_3_2_44_2","doi-asserted-by":"publisher","DOI":"10.1145\/3472749.3474768"},{"key":"e_1_3_3_2_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3517428.3544802"},{"key":"e_1_3_3_2_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3663548.3675617"},{"key":"e_1_3_3_2_47_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0A Nees and Eliana Liebman. 2023. Auditory icons earcons spearcons and speech: A systematic review and meta-analysis of brief audio alerts in human-machine interfaces. Auditory Perception & Cognition 6 3-4 (2023) 300\u2013329.","DOI":"10.1080\/25742442.2023.2219201"},{"key":"e_1_3_3_2_48_2","doi-asserted-by":"crossref","unstructured":"Baruch Nevo. 1985. Face validity revisited. Journal of educational measurement 22 4 (1985) 287\u2013293.","DOI":"10.1111\/j.1745-3984.1985.tb01065.x"},{"key":"e_1_3_3_2_49_2","doi-asserted-by":"publisher","DOI":"10.1145\/345513.345282"},{"key":"e_1_3_3_2_50_2","doi-asserted-by":"publisher","DOI":"10.1145\/3596671.3598578"},{"key":"e_1_3_3_2_51_2","unstructured":"NV Access. 2024. NVDA: NonVisual Desktop Access. https:\/\/www.nvaccess.org. Screen reader for Windows."},{"key":"e_1_3_3_2_52_2","unstructured":"American\u00a0Council of\u00a0the Blind. 2003. The Audio Description Project. https:\/\/adp.acb.org\/guidelines.html."},{"key":"e_1_3_3_2_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415864"},{"key":"e_1_3_3_2_54_2","doi-asserted-by":"crossref","unstructured":"Lorenzo Picinali Amandine Afonso Michel Denis and Brian\u00a0FG Katz. 2014. Exploration of architectural spaces by blind people using auditory virtual reality for the construction of spatial knowledge. International Journal of Human-Computer Studies 72 4 (2014) 393\u2013407.","DOI":"10.1016\/j.ijhcs.2013.12.008"},{"key":"e_1_3_3_2_55_2","unstructured":"The Audio\u00a0Description Project. 2019. adp.acb.org. https:\/\/adp.acb.org\/guidelines.html."},{"key":"e_1_3_3_2_56_2","unstructured":"Renato\u00a0Alexandre Ribeiro In\u00eas Gon\u00e7alves Manuel Pi\u00e7arra Let\u00edcia\u00a0Seixas Pereira Carlos Duarte Andr\u00e9 Rodrigues and Jo\u00e3o Guerreiro. 2024. Investigating Virtual Reality Locomotion Techniques with Blind People. (2024)."},{"key":"e_1_3_3_2_57_2","doi-asserted-by":"crossref","unstructured":"Tomoya Sato Yusuke Sugano and Yoichi Sato. 2025. Audio-visual localization based on spatial relative sound order. Machine Vision and Applications 36 4 (2025) 90.","DOI":"10.1007\/s00138-025-01700-0"},{"key":"e_1_3_3_2_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/HAPTIC.2010.5444631"},{"key":"e_1_3_3_2_59_2","doi-asserted-by":"crossref","unstructured":"Thomas Schubert Frank Friedmann and Holger Regenbrecht. 2001. The experience of presence: Factor analytic insights. Presence: Teleoperators & Virtual Environments 10 3 (2001) 266\u2013281. Original IPQ work..","DOI":"10.1162\/105474601300343603"},{"key":"e_1_3_3_2_60_2","unstructured":"Thomas Schubert Holger Regenbrecht and Frank Friedmann. 2025. Igroup Presence Questionnaire (IPQ) Overview. https:\/\/www.igroup.org\/pq\/ipq\/index.php Accessed 2025."},{"key":"e_1_3_3_2_61_2","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376353"},{"key":"e_1_3_3_2_62_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174090"},{"key":"e_1_3_3_2_63_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ics.2005.05.215"},{"key":"e_1_3_3_2_64_2","unstructured":"Pixar\u00a0Animation Studios. 2004 (accessed August 2022). The Incredibles: Am I Fired Scene with Audio Description. https:\/\/www.youtube.com\/watch?v=2zhzVGmyjtg."},{"key":"e_1_3_3_2_65_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300243"},{"key":"e_1_3_3_2_66_2","doi-asserted-by":"publisher","DOI":"10.1145\/354324.354371"},{"key":"e_1_3_3_2_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE.2018.8326167"},{"key":"e_1_3_3_2_68_2","doi-asserted-by":"crossref","unstructured":"Shari Trewin Mark Laff Vicki Hanson and Anna Cavender. 2009. Exploring visual and motor accessibility in navigating a virtual world. ACM Transactions on Accessible Computing (TACCESS) 2 2 (2009) 11.","DOI":"10.1145\/1530064.1530069"},{"key":"e_1_3_3_2_69_2","doi-asserted-by":"crossref","unstructured":"Dimitrios Tzovaras Konstantinos Moustakas Georgios Nikolakis and Michael\u00a0G Strintzis. 2009. Interactive Mixed Reality White Cane Simulation for the Training of the Blind and the Visually Impaired. Personal and Ubiquitous Computing 13 1 (2009) 51\u201358.","DOI":"10.1007\/s00779-007-0171-2"},{"key":"e_1_3_3_2_70_2","doi-asserted-by":"publisher","DOI":"10.1145\/638249.638259"},{"key":"e_1_3_3_2_71_2","volume-title":"Ultralytics YOLOv8 Performance Metrics","year":"2024","unstructured":"Ultralytics. 2024. Ultralytics YOLOv8 Performance Metrics. Ultralytics. https:\/\/docs.ultralytics.com\/models\/yolov8\/#performance-metrics"},{"key":"e_1_3_3_2_72_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642839"},{"key":"e_1_3_3_2_73_2","doi-asserted-by":"crossref","unstructured":"Bruce\u00a0N. Walker and Jeffrey Lindsay. 2006. Navigation Performance With a Virtual Auditory Display: Effects of Beacon Sound Capture Radius and Practice. Human Factors 48 2 (2006) 265\u2013278.","DOI":"10.1518\/001872006777724507"},{"key":"e_1_3_3_2_74_2","volume-title":"Proceedings of the 2001 International Conference on Auditory Display","author":"Waters Dean\u00a0A.","year":"2001","unstructured":"Dean\u00a0A. Waters and Husam\u00a0H. Abulula. 2001. The Virtual Bat: Echolocation in Virtual Reality. In Proceedings of the 2001 International Conference on Auditory Display. Espoo, Finland, 6\u00a0pages."},{"key":"e_1_3_3_2_75_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300371"},{"key":"e_1_3_3_2_76_2","unstructured":"Alice Wong Hannah Gillis and Ben Peck. 2018. VR Accessibility Survey: Survey for People with Disabilities. https:\/\/drive.google.com\/file\/d\/0B0VwTVwReMqLMFIzdzVVaVdaTFk\/view. Last accessed 21 December 2018."},{"key":"e_1_3_3_2_77_2","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024. Depth Anything V2. arXiv:https:\/\/arXiv.org\/abs\/2406.09414 (2024)."},{"key":"e_1_3_3_2_78_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597638.3608388"},{"key":"e_1_3_3_2_79_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025846"},{"key":"e_1_3_3_2_80_2","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173690"},{"key":"e_1_3_3_2_81_2","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300341"},{"key":"e_1_3_3_2_82_2","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3501884"},{"key":"e_1_3_3_2_83_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642582"}],"event":{"name":"UIST '25: The 38th Annual ACM Symposium on User Interface Software and Technology","location":"Busan Republic of Korea","acronym":"UIST '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the 38th Annual ACM Symposium on User Interface Software and Technology"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746059.3747641","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,27]],"date-time":"2025-09-27T22:00:08Z","timestamp":1759010408000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746059.3747641"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,27]]},"references-count":82,"alternative-id":["10.1145\/3746059.3747641","10.1145\/3746059"],"URL":"https:\/\/doi.org\/10.1145\/3746059.3747641","relation":{},"subject":[],"published":{"date-parts":[[2025,9,27]]},"assertion":[{"value":"2025-09-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}