{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T00:51:02Z","timestamp":1769561462273,"version":"3.49.0"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-025-04676-w","type":"journal-article","created":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T12:33:54Z","timestamp":1769517234000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Advancing Bengali Vision-Language Comprehension: A Deep Learning and NLP Paradigm for Image Captioning"],"prefix":"10.1007","volume":"7","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4872-9901","authenticated-orcid":false,"given":"Dipankar","family":"Dey","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2297-6576","authenticated-orcid":false,"given":"Dipak Kumar","family":"Jana","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Prajna","family":"Bhunia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,27]]},"reference":[{"key":"4676_CR1","doi-asserted-by":"publisher","first-page":"1381","DOI":"10.1016\/j.procs.2020.04.148","volume":"171","author":"R Sharmin","year":"2020","unstructured":"Sharmin R, Rahut SK, Huq MR. Bengali spoken digit classification: a deep learning approach using convolutional neural network. Procedia Comput Sci. 2020;171:1381\u20138. https:\/\/doi.org\/10.1016\/j.procs.2020.04.148.","journal-title":"Procedia Comput Sci"},{"issue":"Part A","key":"4676_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ijfatigue.2022.107324","volume":"167","author":"X Sun","year":"2023","unstructured":"Sun X, Zhou T, Song K, Chen X. An image recognition based multiaxial low-cycle fatigue life prediction method with CNN model. Int J Fatigue. 2023;167(Part A):107324. https:\/\/doi.org\/10.1016\/j.ijfatigue.2022.107324.","journal-title":"Int J Fatigue"},{"key":"4676_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.array.2022.100244","volume":"15","author":"MSH Onim","year":"2022","unstructured":"Onim MSH, Nyeem H, Roy K, Hasan M, Ishmam A, Akif MAH, et al. BLPnet: a new DNN model and Bengali OCR engine for Automatic Licence Plate Recognition. Array. 2022;15:100244. https:\/\/doi.org\/10.1016\/j.array.2022.100244.","journal-title":"Array"},{"issue":"5","key":"4676_CR4","doi-asserted-by":"publisher","first-page":"997","DOI":"10.1016\/S0031-3203(01)00089-9","volume":"35","author":"AFR Rahman","year":"2002","unstructured":"Rahman AFR, Rahman R, Fairhurst MC. Recognition of handwritten Bengali characters: a novel multistage approach. Pattern Recogn. 2002;35(5):997\u20131006. https:\/\/doi.org\/10.1016\/S0031-3203(01)00089-9.","journal-title":"Pattern Recogn"},{"issue":"8","key":"4676_CR5","doi-asserted-by":"publisher","first-page":"771","DOI":"10.1016\/S0167-8655(99)00041-0","volume":"20","author":"S Sural","year":"1999","unstructured":"Sural S, Das PK. An MLP using Hough transform based fuzzy feature extraction for Bengali script recognition. Pattern Recogn Lett. 1999;20(8):771\u201382. https:\/\/doi.org\/10.1016\/S0167-8655(99)00041-0.","journal-title":"Pattern Recogn Lett"},{"issue":"6","key":"4676_CR6","doi-asserted-by":"publisher","first-page":"2610","DOI":"10.1016\/j.jksuci.2020.03.002","volume":"34","author":"A Sufian","year":"2022","unstructured":"Sufian A, Ghosh A, Naskar A, Sultana F, Sil J, Hafizur Rahman MM. BDNet: bengali handwritten numeral digit recognition based on densely connected convolutional neural networks. J King Saud Univ - Comput Inf Sci. 2022;34(6):2610\u20132. https:\/\/doi.org\/10.1016\/j.jksuci.2020.03.002.","journal-title":"J King Saud Univ - Comput Inf Sci"},{"key":"4676_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.115394","volume":"184","author":"MR Hossain","year":"2021","unstructured":"Hossain MR, Hoque MM, Siddique N, Sarker IH. Bengali text document categorization based on very deep convolution neural network. Expert Syst Appl. 2021;184:115394. https:\/\/doi.org\/10.1016\/j.eswa.2021.115394.","journal-title":"Expert Syst Appl"},{"key":"4676_CR8","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1016\/j.patcog.2019.03.030","volume":"92","author":"R Ghosh","year":"2019","unstructured":"Ghosh R, Vamshi C, Kumar P. RNN based online handwritten word recognition in Devanagari and Bengali scripts using horizontal zoning. Pattern Recogn. 2019;92:203\u201318. https:\/\/doi.org\/10.1016\/j.patcog.2019.03.030.","journal-title":"Pattern Recogn"},{"key":"4676_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108274","volume":"182","author":"HS Das","year":"2021","unstructured":"Das HS, Roy P. A CNN-BiLSTM based hybrid model for Indian language identification. Appl Acoust. 2021;182:108274. https:\/\/doi.org\/10.1016\/j.apacoust.2021.108274.","journal-title":"Appl Acoust"},{"key":"4676_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.imu.2022.101077","volume":"33","author":"MM Islam","year":"2022","unstructured":"Islam MM, Uddin MR, AKhtar MN, Rafiqul Alam KM. Recognizing multiclass Static Sign Language words for deaf and dumb people of Bangladesh based on transfer learning techniques. Inform Med Unlocked. 2022;33:101077. https:\/\/doi.org\/10.1016\/j.imu.2022.101077.","journal-title":"Inform Med Unlocked"},{"key":"4676_CR11","doi-asserted-by":"publisher","first-page":"2554","DOI":"10.1016\/j.procs.2020.03.309","volume":"167","author":"S Ahlawat","year":"2020","unstructured":"Ahlawat S, Choudhary A. Hybrid CNN-SVM classifier for handwritten digit recognition. Procedia Comput Sci. 2020;167:2554\u201360. https:\/\/doi.org\/10.1016\/j.procs.2020.03.309.","journal-title":"Procedia Comput Sci"},{"issue":"Part B","key":"4676_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2022.118914","volume":"213","author":"S Das","year":"2023","unstructured":"Das S, Imtiaz MS, Neom NH, Siddique N, Wang H. A hybrid approach for Bangla sign language recognition using deep transfer learning model with random forest classifier. Expert Syst Appl. 2023;213(Part B):118914. https:\/\/doi.org\/10.1016\/j.eswa.2022.118914.","journal-title":"Expert Syst Appl"},{"key":"4676_CR13","doi-asserted-by":"publisher","first-page":"528","DOI":"10.1016\/j.procs.2018.10.426","volume":"143","author":"ASA Rabby","year":"2018","unstructured":"Rabby ASA, Haque S, Islam S, Abujar S, Hossain SA. BornoNet: bangla handwritten characters recognition using convolutional neural network. Procedia Comput Sci. 2018;143:528\u201335. https:\/\/doi.org\/10.1016\/j.procs.2018.10.426.","journal-title":"Procedia Comput Sci"},{"key":"4676_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2022.101448","volume":"78","author":"M Antony Robert Raj","year":"2023","unstructured":"Antony Robert Raj M, Abirami S, Shyni SM. Tamil handwritten character recognition system using statistical algorithmic approaches. Comput Speech Lang. 2023;78:101448. https:\/\/doi.org\/10.1016\/j.csl.2022.101448.","journal-title":"Comput Speech Lang"},{"issue":"3","key":"4676_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuri.2021.100016","volume":"2","author":"SD Pande","year":"2022","unstructured":"Pande SD, Jadhav PP, Joshi R, Sawant AD, Muddebihalkar V, Rathod S, et al. Digitization of handwritten Devanagari text using CNN transfer learning \u2013 A better customer service support. Neuroscience Informatics. 2022;2(3):100016. https:\/\/doi.org\/10.1016\/j.neuri.2021.100016.","journal-title":"Neuroscience Informatics"},{"issue":"17","key":"4676_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.heliyon.2024.e36272","volume":"10","author":"A Bhuiyan","year":"2024","unstructured":"Bhuiyan A, Hossain E, Hoque MM, Ali Akber Dewan M. Enhancing image caption generation through context-aware attention mechanism. Heliyon. 2024;10(17):e36272. https:\/\/doi.org\/10.1016\/j.heliyon.2024.e36272.","journal-title":"Heliyon"},{"key":"4676_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2023.107863","volume":"209","author":"S Wang","year":"2023","unstructured":"Wang S, Zeng Q, Ni W, Cheng C, Wang Y. ODP-Transformer: interpretation of pest classification results using image caption generation techniques. Comput Electron Agric. 2023;209:107863. https:\/\/doi.org\/10.1016\/j.compag.2023.107863.","journal-title":"Comput Electron Agric"},{"key":"4676_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102490","volume":"79","author":"R Yang","year":"2023","unstructured":"Yang R, Cui X, Qin Q, Deng Z, Lan R, Luo X. Fast RF-UIC: a fast unsupervised image captioning model. Displays. 2023;79:102490. https:\/\/doi.org\/10.1016\/j.displa.2023.102490.","journal-title":"Displays"},{"key":"4676_CR19","doi-asserted-by":"publisher","DOI":"10.1007\/s42979-023-01671-x","author":"B Das","year":"2023","unstructured":"Das B, Pal R, Majumder M, Phadikar S, Sekh AA. A visual attention-based model for bengali image captioning. SN Comput Sci. 2023. https:\/\/doi.org\/10.1007\/s42979-023-01671-x.","journal-title":"SN Comput Sci"},{"key":"4676_CR20","unstructured":"Kim W, Son B, Kim I. ViLT: vision-and-language transformer without convolution or region supervision. arXiv preprint arXiv:2102.03334. 2021."},{"key":"4676_CR21","unstructured":"Devlin J, Chang MW, Lee K, Toutanova K. BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805. 2018."}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04676-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-025-04676-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-025-04676-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T12:34:01Z","timestamp":1769517241000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-025-04676-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,27]]},"references-count":21,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["4676"],"URL":"https:\/\/doi.org\/10.1007\/s42979-025-04676-w","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,27]]},"assertion":[{"value":"25 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors affirm that they have no known financial or interpersonal conflicts that may have looked to have influenced the research presented in this study. The authors state that they do not have any known competing financial interests or personal ties that may seem to have influenced the work disclosed in this study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"141"}}