{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:27:45Z","timestamp":1766068065155,"version":"3.44.0"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T00:00:00Z","timestamp":1724371200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T00:00:00Z","timestamp":1724371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-20036-x","type":"journal-article","created":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T15:45:31Z","timestamp":1724427931000},"page":"24745-24764","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["TransEffiVisNet \u2013 an image captioning architecture for auditory assistance for the visually impaired"],"prefix":"10.1007","volume":"84","author":[{"given":"Harshitha","family":"R","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4460-6915","authenticated-orcid":false,"given":"Lakshmipriya","family":"B","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vallidevi","family":"Krishnamurthy","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,23]]},"reference":[{"key":"20036_CR1","unstructured":"World Health Organization (n.d.) Blindness and visual impairment. Retrieved from https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/blindness-and-visual-impairment. Accessed 10 Aug 2023"},{"key":"20036_CR2","doi-asserted-by":"crossref","unstructured":"Islam RB, Akhter S, Iqbal F, Rahman MSU, Khan R (2023) Deep learning based object detection and surrounding environment description for visually impaired people. Heliyon 9(6):e16924","DOI":"10.1016\/j.heliyon.2023.e16924"},{"key":"20036_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.118720","volume":"212","author":"B Kuriakose","year":"2023","unstructured":"Kuriakose B, Shrestha R, Sandnes FE (2023) DeepNAVI: A deep learning-based smartphone navigation assistant for people with visual impairments. Expert Syst Appl 212:118720","journal-title":"Expert Syst Appl"},{"key":"20036_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.105188","volume":"114","author":"S Alashhab","year":"2022","unstructured":"Alashhab S, Gallego AJ, Lozano M\u00c1 (2022) Efficient gesture recognition for the assistance of visually impaired people using multi-head neural networks. Eng Appl Artif Intell 114:105188","journal-title":"Eng Appl Artif Intell"},{"key":"20036_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.advengsoft.2022.103362","volume":"176","author":"A Nagarajan","year":"2023","unstructured":"Nagarajan A, Gopinath MP (2023) Hybrid optimization-enabled deep learning for indoor object detection and distance estimation to assist visually impaired persons. Adv Eng Softw 176:103362","journal-title":"Adv Eng Softw"},{"issue":"2","key":"20036_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.asej.2023.102387","volume":"15","author":"AB Atitallah","year":"2024","unstructured":"Atitallah AB, Said Y, Atitallah MAB, Albekairi M, Kaaniche K, Boubaker S (2024) An effective obstacle detection system using deep learning advantages to aid blind and visually impaired navigation. Ain Shams Eng J 15(2):102387","journal-title":"Ain Shams Eng J"},{"key":"20036_CR7","doi-asserted-by":"crossref","unstructured":"Ashiq F, Asif M, Ahmad MB, Zafar S, Masood K, Mahmood T, ... Lee IH (2022) CNN-based object recognition and tracking system to assist visually impaired people. IEEE Access 10:14819\u201314834","DOI":"10.1109\/ACCESS.2022.3148036"},{"key":"20036_CR8","doi-asserted-by":"publisher","unstructured":"Ali ZA (2023) Design and evaluation of two obstacle detection devices for visually impaired people. J Eng Res 100132. https:\/\/doi.org\/10.1016\/j.jer.2023.100132","DOI":"10.1016\/j.jer.2023.100132"},{"key":"20036_CR9","unstructured":"Seeing AI (n.d.) Microsoft garage. From https:\/\/www.microsoft.com\/en-us\/garage\/wall-of-fame\/seeing-ai.\u00a0Accessed 8 Jul 2024"},{"key":"20036_CR10","unstructured":"Khoury RE (2018) Envision AI helps visually impaired people better see and read the world. Android Police. https:\/\/www.androidpolice.com\/2018\/08\/15\/envision-ai-helps-visually-impaired-people-better-see-read-world\/#:~:text=Envision%20is%20a%20tool%20that%20uses%20artificial%20intelligence. Accessed 08 Jul 2024"},{"key":"20036_CR11","unstructured":"Google Lookout uses AI to describe surroundings for the visually impaired. ZDNET. https:\/\/www.zdnet.com\/article\/google-lookout-uses-ai-to-describe-surroundings-for-the-visually-impaired\/. Accessed 08 Jul 2024"},{"key":"20036_CR12","unstructured":"Unlocking independence: explore the SuperSense app for the visually impaired. https:\/\/www.toolify.ai\/ai-news\/unlocking-independence-explore-the-supersense-app-for-the-visually-impaired-2253971#:~:text=%F0%9F%8C%9F%20Highlights%201%20World%20Services%20for%20the%20Blind. Accessed 29 Feb 2024"},{"key":"20036_CR13","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S et al (2014) Microsoft COCO: common objects in context. Computer Vision-ECCV 2014(5):740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"20036_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.rineng.2023.101107","volume":"18","author":"AM Rinaldi","year":"2023","unstructured":"Rinaldi AM, Russo C, Tommasino C (2023) Automatic image captioning combining natural language processing and deep neural networks. Results Eng 18:101107","journal-title":"Results Eng"},{"key":"20036_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103857","volume":"237","author":"M Cornia","year":"2023","unstructured":"Cornia M, Baraldi L, Tal A, Cucchiara R (2023) Fully-attentive iterative networks for region-based controllable image and video captioning. Comput Vis Image Underst 237:103857","journal-title":"Comput Vis Image Underst"},{"key":"20036_CR16","doi-asserted-by":"publisher","unstructured":"Ueda A, Yang W, Sugiura K (2023) Switching text-based image encoders for captioning images with text. IEEE Access. https:\/\/doi.org\/10.1109\/access.2023.3282444","DOI":"10.1109\/access.2023.3282444"},{"key":"20036_CR17","doi-asserted-by":"publisher","first-page":"686","DOI":"10.1016\/j.procs.2023.01.049","volume":"218","author":"AK Poddar","year":"2023","unstructured":"Poddar AK, Rani R (2023) Hybrid architecture using CNN and LSTM for image captioning in Hindi language. Procedia Comput Sci 218:686\u2013696","journal-title":"Procedia Comput Sci"},{"issue":"3","key":"20036_CR18","doi-asserted-by":"publisher","first-page":"1985","DOI":"10.1109\/TGRS.2019.2951636","volume":"58","author":"X Lu","year":"2019","unstructured":"Lu X, Wang B, Zheng X (2019) Sound active attention framework for remote sensing image captioning. IEEE Trans Geosci Remote Sens 58(3):1985\u20132000","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"20036_CR19","unstructured":"University of Illinois at Urbana-Champaign (n.d.) Flickr 8K dataset. http:\/\/nlp.cs.illinois.edu\/HockenmaierGroup\/Framing_Image_Description\/KCCA.html. Accessed 4 Dec 2023"},{"key":"20036_CR20","first-page":"1","volume":"2020","author":"Y Chu","year":"2020","unstructured":"Chu Y, Yue X, Yu L, Sergei M, Wang Z (2020) Automatic image captioning based on ResNet50 and LSTM with soft attention. Wirel Commun Mob Comput 2020:1\u20137","journal-title":"Wirel Commun Mob Comput"},{"key":"20036_CR21","doi-asserted-by":"crossref","unstructured":"Li P, Zhang M, Lin P, Wan J, Jiang M (2022) Visual-text reference pretraining model for image captioning. Comput Intell Neurosci 2022:9400999","DOI":"10.1155\/2022\/9400999"},{"key":"20036_CR22","doi-asserted-by":"publisher","unstructured":"Yanagimoto H, Shozu M (2020) Multiple perspective caption generation with attention mechanism. In: 2020 9th international congress on advanced applied informatics (IIAI-AAI). IEEE, pp 110\u2013115. https:\/\/doi.org\/10.1109\/IIAI-AAI50415.2020.00031","DOI":"10.1109\/IIAI-AAI50415.2020.00031"},{"key":"20036_CR23","doi-asserted-by":"publisher","unstructured":"Luo RC, Hsu YT, Wen YC, Ye HJ (2019) Visual image caption generation for service robotics and industrial applications. In: 2019 IEEE international conference on industrial cyber physical systems (ICPS). IEEE, pp 827\u2013832. https:\/\/doi.org\/10.1109\/ICPHYS.2019.8780171","DOI":"10.1109\/ICPHYS.2019.8780171"},{"key":"20036_CR24","doi-asserted-by":"crossref","unstructured":"Hoxha G, Melgani F (2020) Remote sensing image captioning with SVM-based decoding. In: IGARSS 2020\u20132020 IEEE International Geoscience and Remote Sensing Symposium. IEEE, pp 6734\u20136737","DOI":"10.1109\/IGARSS39084.2020.9323651"},{"issue":"9","key":"20036_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.jksuci.2023.101750","volume":"35","author":"A Alsayed","year":"2023","unstructured":"Alsayed A, Qadah TM, Arif M (2023) A performance analysis of transformer- based deep learning models for Arabic image captioning. J King Saud Univ-Comput Inform Sci 35(9):101750","journal-title":"J King Saud Univ-Comput Inform Sci"},{"key":"20036_CR26","unstructured":"Grubinger M, Clough P, Muller H, Deselaers T (2006) The iapr benchark: a new evaluation resource for visual information systems. In: International Conference on Language Resources and Evaluation"},{"key":"20036_CR27","doi-asserted-by":"crossref","unstructured":"Yan F, Mikolajczyk K (2015) Deep correlation for matching images and text. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp 3441\u20133450","DOI":"10.1109\/CVPR.2015.7298966"},{"key":"20036_CR28","doi-asserted-by":"crossref","unstructured":"Kwon H, Lee S (2022) Toward backdoor attacks for image captioning model in deep neural networks. Secur Commun Netw 2022:1525052","DOI":"10.1155\/2022\/1525052"},{"key":"20036_CR29","doi-asserted-by":"crossref","unstructured":"Runyan D, Wenkai Z, Zhi G, Xian S (2023) A survey on learning objects\u2019 relationship for image captioning. Comput Intell Neurosci 2023:8600853","DOI":"10.1155\/2023\/8600853"},{"key":"20036_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104570","volume":"128","author":"Z Wang","year":"2022","unstructured":"Wang Z, Shi S, Zhai Z, Wu Y, Yang R (2022) ArCo: attention-reinforced transformer with contrastive learning for image captioning. Image Vis Comput 128:104570","journal-title":"Image Vis Comput"},{"key":"20036_CR31","doi-asserted-by":"crossref","unstructured":"Kavitha R, Sandhya SS, Betes P, Rajalakshmi P, Sarubala E (2023) Deep learning-based image captioning for visually impaired people. In: E3S Web of Conferences, vol. 399. EDP Sciences, p 04005","DOI":"10.1051\/e3sconf\/202339904005"},{"key":"20036_CR32","doi-asserted-by":"crossref","unstructured":"Ahsan H, Bhatt d, Shah K, Bhalla N (2021) Multi-modal image captioning for the visually impaired. In: Proceedings of the 2021 conference of the North American chapter of the association for computational linguistics: student research workshop, online. Association for Computational Linguistics, pp 53\u201360","DOI":"10.18653\/v1\/2021.naacl-srw.8"},{"key":"20036_CR33","doi-asserted-by":"crossref","unstructured":"Yousif AJ, Al-Jammas MH (2023) Exploring deep learning approaches for video captioning: a comprehensive review. e-Prime-Adv Electr Eng Electron Energy 6:100372","DOI":"10.1016\/j.prime.2023.100372"},{"key":"20036_CR34","unstructured":"Nguyen T, Gadre SY, Ilharco G, Oh S, Schmidt L (2024) Improving multimodal datasets with image captioning. Adv Neural Inf Proces Syst 36"},{"key":"20036_CR35","doi-asserted-by":"publisher","unstructured":"Shao Z, Han J, Debattista K, Pang Y (2024) DCMSTRD: end-to-end dense captioning via multi-scale transformer decoding. IEEE Trans Multimedia. https:\/\/doi.org\/10.1109\/tmm.2024.3369863","DOI":"10.1109\/tmm.2024.3369863"},{"key":"20036_CR36","doi-asserted-by":"publisher","first-page":"8753","DOI":"10.1109\/TMM.2023.3241517","volume":"25","author":"Z Shao","year":"2023","unstructured":"Shao Z, Han J, Debattista K, Pang Y (2023) Textual context-aware dense captioning with diverse words. IEEE Trans Multimedia 25:8753\u20138766","journal-title":"IEEE Trans Multimedia"},{"key":"20036_CR37","unstructured":"Shao Z, Han J, Marnerides D, Debattista K (2022) Region-object relation-aware dense captioning via transformer. IEEE Trans Neural Netw Learn Syst"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20036-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-20036-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20036-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T20:54:38Z","timestamp":1757105678000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-20036-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,23]]},"references-count":37,"journal-issue":{"issue":"22","published-online":{"date-parts":[[2025,7]]}},"alternative-id":["20036"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-20036-x","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,8,23]]},"assertion":[{"value":"23 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 August 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}