{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:59:31Z","timestamp":1776931171388,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,13]]},"DOI":"10.1145\/3716553.3750763","type":"proceedings-article","created":{"date-parts":[[2025,10,11]],"date-time":"2025-10-11T13:13:16Z","timestamp":1760188396000},"page":"191-199","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal LLM using Federated Visual Instruction Tuning for Visually Impaired"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-8127-2661","authenticated-orcid":false,"given":"Ankith","family":"Bala","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, University at Buffalo, Buffalo, NY, USA and Radial Ventures, Buffalo, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0612-4883","authenticated-orcid":false,"given":"Alina","family":"Vereshchaka","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, University at Buffalo, Buffalo, New York, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,10,12]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. GPT-4 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 abs\/2303.08774 2303.08774 (2023) 1\u2013100. https:\/\/arxiv.org\/abs\/2303.08774 Accessed: 2025-08-01."},{"key":"e_1_3_3_1_3_2","unstructured":"Amazon Web Services. 2023. Amazon Polly: Text-to-Speech Service. https:\/\/aws.amazon.com\/polly\/. Accessed: 2025-08-01."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3641989"},{"key":"e_1_3_3_1_6_2","unstructured":"Chaoyou Fu Peixian Chen Yunhang Shen Yulei Qin Mengdan Zhang Xu Lin Jinrui Yang Xiawu Zheng Ke Li Xing Sun Yunsheng Wu and Rongrong Ji. 2024. MME: A Comprehensive Evaluation Benchmark for Multimodal Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2306.13394\u00a0[cs.CV] https:\/\/arxiv.org\/abs\/2306.13394 arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.13394 Accessed: 2025-08-01."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01455"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3663548.3675637"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01363"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00380"},{"key":"e_1_3_3_1_11_2","volume-title":"International Conference on Learning Representations (ICLR)","author":"Han Song","year":"2016","unstructured":"Song Han, Huizi Mao, and William\u00a0J. Dally. 2016. Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding. In International Conference on Learning Representations (ICLR). https:\/\/arxiv.org\/abs\/1510.00149 Presented at ICLR 2016."},{"key":"e_1_3_3_1_12_2","unstructured":"Edward\u00a0J. Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. arxiv:https:\/\/arXiv.org\/abs\/2106.09685\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2106.09685 Accessed: 2025-08-01."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00286"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","unstructured":"Peter Kairouz H.\u00a0Brendan McMahan Brendan Avent Aur\u00e9lien Bellet Mehdi Bennis Arjun\u00a0Nitin Bhagoji Kallista Bonawitz Zachary Charles Graham Cormode Rachel Cummings et\u00a0al. 2021. Advances and Open Problems in Federated Learning. Foundations and Trends in Machine Learning 14 1\u20132 (2021) 1\u2013210. 10.1561\/2200000083","DOI":"10.1561\/2200000083"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","unstructured":"Bineeth Kuriakose Raju Shrestha and Frode\u00a0Eika Sandnes. 2023. DeepNAVI: A Deep Learning\u2011Based Smartphone Navigation Assistant for People with Visual Impairments. Expert Systems with Applications 212 (2023) 118720. 10.1016\/j.eswa.2022.118720","DOI":"10.1016\/j.eswa.2022.118720"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58577-88"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","unstructured":"Haotian Liu Chunyuan Li Qingyang Wu and Yong\u00a0Jae Lee. 2023. Visual Instruction Tuning. CoRR abs\/2304.08485 (2023). 10.48550\/arXiv.2304.08485","DOI":"10.48550\/arXiv.2304.08485"},{"key":"e_1_3_3_1_19_2","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS 2019)","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. ViLBERT: Pretraining Task-Agnostic Visiolinguistic Representations for Vision-and-Language Tasks. In Proceedings of the 33rd International Conference on Neural Information Processing Systems (NeurIPS 2019). Curran Associates, Inc., Vancouver, BC, Canada."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3025453.3025814"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_3_1_22_2","unstructured":"Leland McInnes John Healy and James Melville. 2018. UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.03426 (2018). arxiv:https:\/\/arXiv.org\/abs\/1802.03426\u00a0[stat.ML] https:\/\/arxiv.org\/abs\/1802.03426 Accessed: 2025-08-01."},{"key":"e_1_3_3_1_23_2","first-page":"1273","volume-title":"Proceedings of the 20th International Conference on Artificial Intelligence and Statistics (AISTATS)","volume":"54","author":"McMahan H.\u00a0Brendan","year":"2017","unstructured":"H.\u00a0Brendan McMahan, Eider Moore, Daniel Ramage, Seth Hampson, and Blaise Aguera\u00a0y Arcas. 2017. Communication\u2011Efficient Learning of Deep Networks from Decentralized Data. In Proceedings of the 20th International Conference on Artificial Intelligence and Statistics (AISTATS) , Vol.\u00a054. PMLR, Fort Lauderdale, FL, USA, 1273\u20131282. https:\/\/proceedings.mlr.press\/v54\/mcmahan17a.html"},{"key":"e_1_3_3_1_24_2","series-title":"Proceedings of Machine Learning Research","first-page":"28492","volume-title":"Proceedings of the 40th International Conference on Machine Learning","volume":"202","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust Speech Recognition via Large-Scale Weak Supervision. In Proceedings of the 40th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0202). PMLR, Honolulu, Hawaii, USA, 28492\u201328518. https:\/\/proceedings.mlr.press\/v202\/radford23a.html"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1514"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","unstructured":"Surya Thapa and Xiaolong Fan. 2021. Privacy\u2011Preserving Machine Learning: Threats and Solutions. Information Fusion 70 (2021) 54\u201384. 10.1016\/j.inffus.2021.01.008","DOI":"10.1016\/j.inffus.2021.01.008"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3338501.3357370"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","unstructured":"Ethan Waisberg Joshua Ong Mouayad Masalkhi Nasif Zaman Prithul Sarker Andrew\u00a0G. Lee and Alireza Tavakkoli. 2024. Meta Smart Glasses\u2014Large Language Models and the Future for Assistive Glasses for Individuals with Vision Impairments. Eye 38 6 (2024) 1036\u20131038. 10.1038\/s41433-024-02889-9","DOI":"10.1038\/s41433-024-02889-9"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"H\u00e9l\u00e8ne Walle Cyril De\u00a0Runz Barth\u00e9lemy Serres and Gilles Venturini. 2022. A Survey on Recent Advances in AI and Vision\u2011Based Methods for Helping and Guiding Visually Impaired People. Applied Sciences 12 5 (2022) 2308.","DOI":"10.3390\/app12052308"},{"key":"e_1_3_3_1_31_2","volume-title":"World Report on Vision","author":"Organization World Health","year":"2019","unstructured":"World Health Organization. 2019. World Report on Vision. Technical Report. World Health Organization, Geneva, Switzerland. https:\/\/www.who.int\/publications\/i\/item\/9789241516570 ISBN: 978\u201192\u20114\u2011151657\u20110; accessed 2025\u201108\u201101."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.521"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","unstructured":"Peng Xu Xiatian Zhu and David\u00a0A. Clifton. 2023. Multimodal learning with transformers: A survey. IEEE Transactions on Pattern Analysis and Machine Intelligence 45 10 (2023) 12113\u201312132. 10.1109\/TPAMI.2022.3206513","DOI":"10.1109\/TPAMI.2022.3206513"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01833"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","unstructured":"Katerina Zdravkova Venera Krasniqi Fisnik Dalipi and Mexhid Ferati. 2022. Cutting\u2011Edge Communication and Learning Assistive Technologies for Disabled Children: An Artificial Intelligence Perspective. Frontiers in Artificial Intelligence 5 (2022) 970430. 10.3389\/frai.2022.970430","DOI":"10.3389\/frai.2022.970430"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10447454"}],"event":{"name":"ICMI '25: International Conference on Multimodal Interaction","location":"Canberra Australia","acronym":"ICMI '25","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 27th International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3716553.3750763","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T22:28:02Z","timestamp":1769466482000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3716553.3750763"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,12]]},"references-count":35,"alternative-id":["10.1145\/3716553.3750763","10.1145\/3716553"],"URL":"https:\/\/doi.org\/10.1145\/3716553.3750763","relation":{},"subject":[],"published":{"date-parts":[[2025,10,12]]},"assertion":[{"value":"2025-10-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}