{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T20:59:57Z","timestamp":1770411597618,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":29,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819569625","type":"print"},{"value":"9789819569632","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-6963-2_12","type":"book-chapter","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T10:08:38Z","timestamp":1770372518000},"page":"128-135","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Ask VR: Vision Language Model Driven Scene Descriptor for\u00a0Blind and\u00a0Low Vision Users in\u00a0VR Environment"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9774-3879","authenticated-orcid":false,"given":"Jaime B.","family":"Fernandez","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8045-3514","authenticated-orcid":false,"given":"Ali Akbar Shah","family":"Syed","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0674-2131","authenticated-orcid":false,"given":"Muhammad Intizar","family":"Ali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"issue":"2","key":"12_CR1","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1111\/j.1467-8535.2006.00531.x","volume":"37","author":"CP Lim","year":"2006","unstructured":"Lim, C.P., Nonis, D., Hedberg, J.: Gaming in a 3D multiuser virtual environment: engaging students in science lessons. Br. J. Edu. Technol. 37(2), 211\u2013231 (2006)","journal-title":"Br. J. Edu. Technol."},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Bowman, D.A., North, C., Chen, J., Polys, N.F., Pyla, P.S., Yilmaz, U.: Information-rich virtual environments: theory, tools, and research agenda. In: Proceedings of the ACM Symposium on Virtual Reality Software and Technology, pp. 81\u201390 (2003)","DOI":"10.1145\/1008653.1008669"},{"issue":"7","key":"12_CR3","doi-asserted-by":"publisher","first-page":"717","DOI":"10.1016\/S0957-4158(99)00032-X","volume":"9","author":"PE Jones","year":"1999","unstructured":"Jones, P.E.: Three-dimensional input device with six degrees of freedom. Mechatronics 9(7), 717\u2013729 (1999)","journal-title":"Mechatronics"},{"issue":"3","key":"12_CR4","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1016\/j.cag.2012.12.003","volume":"37","author":"F Argelaguet","year":"2013","unstructured":"Argelaguet, F., Andujar, C.: A survey of 3D object selection techniques for virtual environments. Comput. Graph. 37(3), 121\u2013136 (2013)","journal-title":"Comput. Graph."},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Cao, X., Ju, K.P., Li, C., Jain, D.: SceneGenA11y: how can runtime generative tools improve the accessibility of a virtual 3D scene?. In: Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1\u201310 (2025)","DOI":"10.1145\/3706599.3720265"},{"key":"12_CR6","unstructured":"Wong, A., Gillis, H., Peck, B.: VR accessibility survey: survey for people with disabilities (2018). https:\/\/drive.google.com\/file\/d\/0B0VwTVwReMqLMFIzdzVVaVdaTFk\/view"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Te\u00f3filo, M., Lucena, V. F., Nascimento, J., Miyagawa, T., Maciel, F.: Evaluating accessibility features designed for virtual reality context. In: 2018 IEEE International Conference on Consumer Electronics (ICCE), pp. 1\u20136. IEEE (2018)","DOI":"10.1109\/ICCE.2018.8326167"},{"issue":"2","key":"12_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1530064.1530069","volume":"2","author":"S Trewin","year":"2009","unstructured":"Trewin, S., Laff, M., Hanson, V., Cavender, A.: Exploring visual and motor accessibility in navigating a virtual world. ACM Trans. Accessible Comput. (TACCESS) 2(2), 1\u201335 (2009)","journal-title":"ACM Trans. Accessible Comput. (TACCESS)"},{"issue":"4","key":"12_CR9","first-page":"113","volume":"14","author":"JM Hollister","year":"2024","unstructured":"Hollister, J.M.: Virtual libraries in the new metaverse: an exploratory study on community libraries in Meta Horizon Worlds. Int. J. Knowl. Content Dev. Technol. 14(4), 113\u2013133 (2024)","journal-title":"Int. J. Knowl. Content Dev. Technol."},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Naikar, V. H., Subramanian, S., Tigwell, G. W.: Accessibility feature implementation within free vr experiences. In: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1\u20139 (2024)","DOI":"10.1145\/3613905.3650935"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Killough, D. et al.: Demonstration of VRSight: AI-driven real-time descriptions to enhance VR accessibility for blind people. In: Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1\u20135 (2025)","DOI":"10.1145\/3706599.3721194"},{"key":"12_CR12","unstructured":"Killough, D. et al.: XR for all: understanding developer perspectives on accessibility integration in extended reality. arXiv preprint arXiv:2412.16321 (2024)"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Desai, K., Johnson, J.: Virtex: learning visual representations from textual annotations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11162\u201311173 (2021)","DOI":"10.1109\/CVPR46437.2021.01101"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, Y. et al.: Recognize anything: a strong image tagging model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1724\u20131732 (2024)","DOI":"10.1109\/CVPRW63382.2024.00179"},{"issue":"5","key":"12_CR15","first-page":"2287","volume":"12","author":"H Yoon","year":"2018","unstructured":"Yoon, H., Kim, B.-H., Mukhriddin, M., Cho, J.: Salient region extraction based on global contrast enhancement and saliency cut for image information recognition of the visually impaired. KSII Trans. Internet Inf. Syst. (TIIS) 12(5), 2287\u20132312 (2018)","journal-title":"KSII Trans. Internet Inf. Syst. (TIIS)"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Stearns, L., Findlater, L., Froehlich, J. E.: Design of an augmented reality magnification aid for low vision users. In: Proceedings of the 20th International ACM SIGACCESS Conference on Computers and Accessibility, pp. 28\u201339 (2018)","DOI":"10.1145\/3234695.3236361"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Lang, F., Machulla, T.: Pressing a button you cannot see: evaluating visual designs to assist persons with low vision through augmented reality. In: Proceedings of the 27th ACM Symposium on Virtual Reality Software and Technology, pp. 1\u201310 (2021)","DOI":"10.1145\/3489849.3489873"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Deng, W., Qi, M., Ma, H.: Global-local tree search in vlms for 3d indoor scene generation. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 8975\u20138984 (2025)","DOI":"10.1109\/CVPR52734.2025.00839"},{"key":"12_CR19","unstructured":"Doveh, S. et al.: Teaching VLMs to localize specific objects from in-context examples. arXiv preprint arXiv:2411.13317 (2024)"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Zha, J., Fan, Y., Yang, X., Gao, C., Chen, X.: How to enable llm with 3d capacity? A survey of spatial reasoning in llm. arXiv preprint arXiv:2504.05786 (2025)","DOI":"10.24963\/ijcai.2025\/1200"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Wang, X. et al.: Spatial 3D-LLM: exploring spatial awareness in 3D vision-language models. arXiv preprint arXiv:2507.16524 (2025)","DOI":"10.1109\/ICME59968.2025.11208990"},{"key":"12_CR22","unstructured":"Cocchi, F. et al.: LLaVA-more: a comparative study of LLMs and visual backbones for enhanced visual instruction tuning. arXiv preprint arXiv:2503.15621 (2025)"},{"key":"12_CR23","unstructured":"Hoque, M., Hasan, M. R., Emon, M. I. S., Khalifa, F., Rahman, M. M.: Medical image interpretation with large multimodal models. In: CEUR Workshop Proceedings, vol. 3740. CEUR-WS. org (2024)"},{"key":"12_CR24","unstructured":"Zhang, S., Fang, Q., Yang, Z., Feng, Y.: Llava-mini: efficient image and video large multimodal models with one vision token. arXiv preprint arXiv:2501.03895 (2025)"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Cai, M. et al.: Vip-llava: making large multimodal models understand arbitrary visual prompts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12914\u201312923 (2024)","DOI":"10.1109\/CVPR52733.2024.01227"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Howard, P., Bhiwandiwalla, A., Fraser, K. C., Kiritchenko, S.: Uncovering bias in large vision-language models with counterfactuals. arXiv preprint arXiv:2404.00166 (2024)","DOI":"10.18653\/v1\/2025.naacl-long.305"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Selvam, S., Rajendran, R. K., Sankaradas, M., Raghunathan, A., Chakradhar, S. T.: SimCache: similarity caching for efficient VLM-based scene understanding. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 3327\u20133336 (2025)","DOI":"10.1109\/CVPRW67362.2025.00315"},{"key":"12_CR28","unstructured":"TIGER AI Lab: VIEScore: Towards Explainable Metrics for Conditional Image Synthesis Evaluation (2024). https:\/\/tiger-ai-lab.github.io\/VIEScore\/"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Fernandez, J.B., Ali, M.I.: System demo of modeling smart university campus virtual environments. In: International Conference on Multimedia Modeling, pp. 218\u2013224. Springer, Singapore (2025)","DOI":"10.1007\/978-981-96-2074-6_25"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-6963-2_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T10:08:45Z","timestamp":1770372525000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-6963-2_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819569625","9789819569632"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-6963-2_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"7 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2026.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}