{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T12:45:42Z","timestamp":1777639542597,"version":"3.51.4"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2022,4,5]],"date-time":"2022-04-05T00:00:00Z","timestamp":1649116800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,4,5]],"date-time":"2022-04-05T00:00:00Z","timestamp":1649116800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"China's National Natural Science Foundation","doi-asserted-by":"crossref","award":["61671418"],"award-info":[{"award-number":["61671418"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1007\/s11042-022-12291-7","type":"journal-article","created":{"date-parts":[[2022,4,5]],"date-time":"2022-04-05T05:02:26Z","timestamp":1649134946000},"page":"29955-29975","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Deep neural combinational model (DNCM): digital image descriptor for child\u2019s independent learning"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5265-6026","authenticated-orcid":false,"given":"Nuzhat","family":"Naqvi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M. Shujah","family":"Islam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mansoor","family":"Iqbal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shamsa","family":"Kanwal","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Asad","family":"Khan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"ZhongFu","family":"Ye","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,4,5]]},"reference":[{"key":"12291_CR1","doi-asserted-by":"publisher","first-page":"279","DOI":"10.2307\/145736","volume":"27","author":"WS Barnett","year":"1992","unstructured":"Barnett WS (1992) Benefits of compensatory preschool education. J Hum Resour 27:279\u2013312","journal-title":"J Hum Resour"},{"key":"12291_CR2","unstructured":"Callison-Burch C, Osborne M, Koehn P (2006) Re-evaluation of the role of bleu in machine translation research. In: 11th Conference of the European Chapter of the Association for Computational Linguistics"},{"key":"12291_CR3","doi-asserted-by":"publisher","first-page":"2959","DOI":"10.1007\/s11042-017-4593-1","volume":"77","author":"YS Chang","year":"2018","unstructured":"Chang YS (2018) Fine-grained attention for image caption generation. Multimed Tools Appl 77:2959\u20132971","journal-title":"Multimed Tools Appl"},{"key":"12291_CR4","doi-asserted-by":"crossref","unstructured":"Chen L, Zhang H, Xiao J, Nie L, Shao J, Liu W, Chua TS (2017) Sca-CNN: spatial and channel-wise attention in convolutional networks for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp 5659\u20135667","DOI":"10.1109\/CVPR.2017.667"},{"key":"12291_CR5","unstructured":"Chen J, Dong W, Li M Image caption generator based on deep neural networks"},{"key":"12291_CR6","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1016\/j.patcog.2018.02.017","volume":"79","author":"Q Cheng","year":"2018","unstructured":"Cheng Q, Zhang Q, Fu P, Tu C, Li S (2018) A survey and analysis on automatic image annotation. Pattern Recogn 79:242\u2013259","journal-title":"Pattern Recogn"},{"key":"12291_CR7","doi-asserted-by":"crossref","unstructured":"Cui Y, Yang G, Veit A, Huang X, Belongie S (2018) Learning to evaluate image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp 5804\u20135812","DOI":"10.1109\/CVPR.2018.00608"},{"key":"12291_CR8","doi-asserted-by":"crossref","unstructured":"Degadwala S, Vyas D, Biswas H, Chakraborty U, Saha S (2021) Image captioning using inception V3 transfer learning model. In: 2021 6th International Conference on Communication and Electronics Systems (ICCES). IEEE, pp 1103\u20131108","DOI":"10.1109\/ICCES51350.2021.9489111"},{"key":"12291_CR9","unstructured":"Denoual E, Lepage Y (2005) BLEU in characters: towards automatic MT evaluation in languages without word delimiters. In: Companion Volume to the Proceedings of Conference including Posters\/Demos and Tutorial Abstracts"},{"key":"12291_CR10","doi-asserted-by":"crossref","unstructured":"Donahue J, Hendricks LA, Guadarrama S, Rohrbach M, Venugopalan S, Saenko K, Darrell T (2015) Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 2625\u20132634","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"12291_CR11","first-page":"15","volume-title":"European conference on computer vision","author":"A Farhadi","year":"2010","unstructured":"Farhadi A, Hejrati M, Sadeghi MA, Young P, Rashtchian C, Hockenmaier J, Forsyth D (2010) Every picture tells a story: generating sentences from images. In: European conference on computer vision. Springer, Berlin, pp 15\u201329"},{"issue":"12","key":"12291_CR12","doi-asserted-by":"publisher","first-page":"2321","DOI":"10.1109\/TPAMI.2016.2642953","volume":"39","author":"K Fu","year":"2017","unstructured":"Fu K, Jin J, Cui R, Sha F, Zhang C (2017) Aligning where to see and what to tell: image captioning with region-based attention and scene-specific contexts. IEEE Trans Pattern Anal Mach Intell 39(12):2321\u20132334","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"12291_CR13","first-page":"529","volume-title":"European conference on computer vision","author":"Y Gong","year":"2014","unstructured":"Gong Y, Wang L, Hodosh M, Hockenmaier J, Lazebnik S (2014) Improving image-sentence embeddings using large, weakly annotated photo collections. In: European conference on computer vision. Springer, Cham, pp 529\u2013545"},{"issue":"24","key":"12291_CR14","doi-asserted-by":"publisher","first-page":"17899","DOI":"10.1007\/s00521-019-04515-z","volume":"32","author":"N Gupta","year":"2020","unstructured":"Gupta N, Jalal AS (2020) Integration of textual cues for fine-grained image captioning using deep CNN and LSTM. Neural Comput & Applic 32(24):17899\u201317908","journal-title":"Neural Comput & Applic"},{"issue":"4","key":"12291_CR15","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1080\/02643944.2016.1225315","volume":"34","author":"R Hibbin","year":"2016","unstructured":"Hibbin R (2016) The psychosocial benefits of oral storytelling in school: developing identity and empathy through narrative. Pastor Care Educ 34(4):218\u2013231","journal-title":"Pastor Care Educ"},{"key":"12291_CR16","doi-asserted-by":"publisher","first-page":"853","DOI":"10.1613\/jair.3994","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh M, Young P, Hockenmaier J (2013) Framing image description as a ranking task: data, models, and evaluation metrics. J Artif Intell Res 47:853\u2013899","journal-title":"J Artif Intell Res"},{"key":"12291_CR17","doi-asserted-by":"crossref","unstructured":"Hossain M, Sohel F, Shiratuddin MF, Laga H (2018) A comprehensive study of deep learning for image captioning. arXiv preprint arXiv:1810.04020","DOI":"10.1145\/3295748"},{"key":"12291_CR18","series-title":"Play in clinical practice: evidence-based approaches","volume-title":"Play and interpersonal processes","author":"JF Jent","year":"2011","unstructured":"Jent JF, Niec LN, Baker SE (2011) Play and interpersonal processes, Play in clinical practice: evidence-based approaches. Guilford Press, New York"},{"key":"12291_CR19","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L (2015). Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 3128\u20133137","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"12291_CR20","doi-asserted-by":"publisher","first-page":"217","DOI":"10.1007\/978-981-16-0586-4_18","volume-title":"Proceedings of International Joint Conference on Advances in Computational Intelligence","author":"MF Khan","year":"2021","unstructured":"Khan MF, Sadiq-Ur-Rahman SM, Islam MS (2021) Improved Bengali image captioning via deep convolutional neural network based encoder-decoder model. In: Proceedings of International Joint Conference on Advances in Computational Intelligence. Springer, Singapore, pp 217\u2013229"},{"key":"12291_CR21","unstructured":"Khosla A, Jayadevaprakash N, Yao B, Li FF (2011) Novel dataset for fine-grained image categorization: Stanford dogs. In: Proc. CVPR Workshop on Fine-Grained Visual Categorization (FGVC), vol. 2, no. 1"},{"key":"12291_CR22","doi-asserted-by":"publisher","first-page":"416","DOI":"10.1016\/j.neucom.2017.07.014","volume":"272","author":"P Kinghorn","year":"2018","unstructured":"Kinghorn P, Zhang L, Shao L (2018) A region-based image caption generator with refined descriptions. Neurocomputing 272:416\u2013424","journal-title":"Neurocomputing"},{"key":"12291_CR23","unstructured":"Kiros R, Salakhutdinov R, Zemel RS (2014) Unifying visual-semantic embeddings with multimodal neural language models. arXiv preprint arXiv:1411.2539"},{"key":"12291_CR24","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1162\/tacl_a_00188","volume":"2","author":"P Kuznetsova","year":"2014","unstructured":"Kuznetsova P, Ordonez V, Berg TL, Choi Y (2014) Treetalk: composition and compression of trees for image descriptions. Trans Assoc Computat Linguist 2:351\u2013362","journal-title":"Trans Assoc Computat Linguist"},{"key":"12291_CR25","doi-asserted-by":"publisher","first-page":"5858","DOI":"10.1109\/ACCESS.2017.2696121","volume":"5","author":"J Lemley","year":"2017","unstructured":"Lemley J, Bazrafkan S, Corcoran P (2017) Smart augmentation learning an optimal data augmentation strategy. IEEE Access 5:5858\u20135869","journal-title":"IEEE Access"},{"key":"12291_CR26","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1109\/TMM.2017.2751140","volume":"20","author":"L Li","year":"2018","unstructured":"Li L, Tang S, Zhang Y, Deng L, Tian Q (2018) GLA: global-local attention for image description. IEEE Trans Multimed 20:726\u2013737","journal-title":"IEEE Trans Multimed"},{"key":"12291_CR27","unstructured":"Lin CY (2004) Rouge: a package for automatic evaluation of summaries. Text Summarization Branches Out"},{"key":"12291_CR28","unstructured":"Mao J, Xu W, Yang Y, Wang J, Huang Z, Yuille A (2014) Deep captioning with multimodal recurrent neural networks (m-rnn). arXiv preprint arXiv:1412.6632"},{"key":"12291_CR29","doi-asserted-by":"publisher","first-page":"24429","DOI":"10.1007\/s11042-020-09128-6","volume":"79","author":"N Naqvi","year":"2020","unstructured":"Naqvi N, Ye Z (2020) Image captions: global-local and joint signals attention model (GL-JSAM). Multimed Tools Appl 79:24429\u201324448. https:\/\/doi.org\/10.1007\/s11042-020-09128-6","journal-title":"Multimed Tools Appl"},{"key":"12291_CR30","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu WJ (2002) BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, pp 311\u2013318","DOI":"10.3115\/1073083.1073135"},{"key":"12291_CR31","unstructured":"Perry BD, Szalavitz M (2010) Born for love: why empathy is essential\u2014and endangered. HarperCollins e-Books"},{"key":"12291_CR32","doi-asserted-by":"publisher","unstructured":"Minoofam SAH, Bastanfard A, Keyvanpour MR (2021) TRCLA: a transfer learning approach to reduce negative transfer for cellular learning automata. In: IEEE transactions on neural networks and learning systems. IEEE. https:\/\/doi.org\/10.1109\/TNNLS.2021.3106705","DOI":"10.1109\/TNNLS.2021.3106705"},{"key":"12291_CR33","doi-asserted-by":"crossref","unstructured":"Shah P, Bakrola V, Pati S (2017) Image captioning using deep neural architectures. In: 2017 International Conference on Innovations in Information, Embedded and Communication Systems (ICIIECS). IEEE, pp 1\u20134","DOI":"10.1109\/ICIIECS.2017.8276124"},{"key":"12291_CR34","unstructured":"Soh M (2016) Learning CNN-LSTM architectures for image caption generation. Dept. Comput. Sci., Stanford Univ., Stanford, CA, USA, Tech. Rep"},{"key":"12291_CR35","doi-asserted-by":"crossref","unstructured":"Sun C, Gan C, Nevatia R (2015) Automatic concept discovery from parallel text and visual corpora. In: Proceedings of the IEEE international conference on computer vision. pp 2596\u20132604","DOI":"10.1109\/ICCV.2015.298"},{"issue":"4","key":"12291_CR36","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1080\/02673843.2016.1267022","volume":"22","author":"E Venter","year":"2017","unstructured":"Venter E (2017) Bridging the communication gap between Generation Y and the Baby Boomer generation. Int J Adolesc Youth 22(4):497\u2013507. https:\/\/doi.org\/10.1080\/02673843.2016.1267022","journal-title":"Int J Adolesc Youth"},{"key":"12291_CR37","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: a neural image caption generator. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp 3156\u20133164","DOI":"10.1109\/CVPR.2015.7298935"},{"issue":"2s","key":"12291_CR38","first-page":"40","volume":"14","author":"C Wang","year":"2018","unstructured":"Wang C, Yang H, Meinel C (2018) Image captioning with deep bidirectional lstms and multi-task learning. ACM Trans Multimed Comput Commun Appl 14(2s):40","journal-title":"ACM Trans Multimed Comput Commun Appl"},{"issue":"1","key":"12291_CR39","first-page":"19","volume":"29","author":"J Warin","year":"2011","unstructured":"Warin J (2011) Stories of self: tracking children's identity and wellbeing through the years of school. Educ Health 29(1):19\u201320","journal-title":"Educ Health"},{"issue":"6","key":"12291_CR40","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1109\/TPAMI.2017.2708709","volume":"40","author":"Q Wu","year":"2018","unstructured":"Wu Q, Shen C, Wang P, Dick A, van den Hengel A (2018) Image captioning and visual question answering based on attributes and external knowledge. IEEE Trans Pattern Anal Mach Intell 40(6):1367\u20131381","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"12291_CR41","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel RS, Bengio Y (2015) Show, attend and tell: Neural image caption generation with visual attention. In: International conference on machine learning. pp 2048\u20132057"},{"key":"12291_CR42","doi-asserted-by":"crossref","unstructured":"Yao T, Pan Y, Li Y, Qiu Z, Mei T (2017) Boosting image captioning with attributes. In Proceedings of the IEEE International Conference on Computer Vision. pp 4894\u20134902","DOI":"10.1109\/ICCV.2017.524"},{"key":"12291_CR43","doi-asserted-by":"publisher","first-page":"25557","DOI":"10.1007\/s11042-021-10632-6","volume":"80","author":"Z Ye","year":"2021","unstructured":"Ye Z, Khan R, Naqvi N, Islam MS (2021) A novel automatic image caption generation using bidirectional long-short term memory framework. Multimed Tools Appl 80:25557\u201325582. https:\/\/doi.org\/10.1007\/s11042-021-10632-6","journal-title":"Multimed Tools Appl"},{"key":"12291_CR44","doi-asserted-by":"crossref","unstructured":"Yu F, Ip HH (2006) Automatic semantic annotation of images using spatial hidden Markov model. In: 2006 IEEE International Conference on Multimedia and Expo. IEEE pp 305\u2013308","DOI":"10.1109\/ICME.2006.262459"},{"key":"12291_CR45","doi-asserted-by":"publisher","first-page":"476","DOI":"10.1016\/j.neucom.2018.11.004","volume":"329","author":"D Zhao","year":"2019","unstructured":"Zhao D, Chang Z, Guo S (2019) A multimodal fusion approach for image captioning. Neurocomputing 329:476\u2013485","journal-title":"Neurocomputing"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12291-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-12291-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12291-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,17]],"date-time":"2022-08-17T05:24:29Z","timestamp":1660713869000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-12291-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,5]]},"references-count":45,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2022,9]]}},"alternative-id":["12291"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-12291-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4,5]]},"assertion":[{"value":"21 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 January 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 April 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}