{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T06:31:18Z","timestamp":1768804278979,"version":"3.49.0"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T00:00:00Z","timestamp":1723507200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T00:00:00Z","timestamp":1723507200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1007\/s00607-024-01334-6","type":"journal-article","created":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T17:02:21Z","timestamp":1723568541000},"page":"3691-3709","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":28,"title":["Automatic video captioning using tree hierarchical deep convolutional neural network\u00a0and ASRNN-bi-directional LSTM"],"prefix":"10.1007","volume":"106","author":[{"given":"N.","family":"Kavitha","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"K. Ruba","family":"Soundar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"R.","family":"Karthick","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"J.","family":"Kohila","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,13]]},"reference":[{"key":"1334_CR1","doi-asserted-by":"publisher","first-page":"347","DOI":"10.1016\/j.neucom.2020.08.035","volume":"417","author":"X Shi","year":"2020","unstructured":"Shi X, Cai J, Gu J, Joty S (2020) Video captioning with boundary-aware hierarchical language decoding and joint video prediction. Neurocomputing 417:347\u2013356","journal-title":"Neurocomputing"},{"issue":"5","key":"1334_CR2","doi-asserted-by":"publisher","first-page":"1372","DOI":"10.1109\/TMM.2019.2941820","volume":"22","author":"N Xu","year":"2019","unstructured":"Xu N, Zhang H, Liu AA, Nie W, Su Y, Nie J, Zhang Y (2019) Multi-level policy and reward-based deep reinforcement learning framework for image captioning. IEEE Trans Multimedia 22(5):1372\u20131383","journal-title":"IEEE Trans Multimedia"},{"key":"1334_CR3","doi-asserted-by":"publisher","first-page":"101524","DOI":"10.1016\/j.compenvurbsys.2020.101524","volume":"84","author":"AR Bahrehdar","year":"2020","unstructured":"Bahrehdar AR, Adams B, Purves RS (2020) Streets of London: Using Flickr and Open Street Map to build an interactive image of the city. Comput Environ Urban Syst 84:101524","journal-title":"Comput Environ Urban Syst"},{"issue":"4","key":"1334_CR4","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1016\/j.ipm.2019.02.018","volume":"56","author":"A Abdi","year":"2019","unstructured":"Abdi A, Shamsuddin SM, Hasan S, Piran J (2019) Deep learning-based sentiment classification of evaluative text based on Multi-feature fusion. Inf Process Manage 56(4):1245\u20131259","journal-title":"Inf Process Manage"},{"issue":"02","key":"1334_CR5","doi-asserted-by":"publisher","first-page":"2354001","DOI":"10.1142\/S0218001423540010","volume":"37","author":"J Jasper GnanaChandran","year":"2023","unstructured":"Jasper GnanaChandran J, Karthick R, Rajagopal R, Meenalochini P (2023) Dual-channel capsule generative adversarial network optimized with golden eagle optimization for pediatric bone age assessment from hand X-Ray image. Int J Pattern Recognit Artif Intell 37(02):2354001","journal-title":"Int J Pattern Recognit Artif Intell"},{"key":"1334_CR6","doi-asserted-by":"crossref","unstructured":"Karthick S, Gomathi N (2024) IoT-based COVID-19 detection using recalling-enhanced recurrent neural network\noptimized with golden eagle optimization algorithm. Medical  Biol Eng Comput 62(3):925\u2013940","DOI":"10.1007\/s11517-023-02973-1"},{"key":"1334_CR7","doi-asserted-by":"publisher","first-page":"100052","DOI":"10.1016\/j.array.2020.100052","volume":"9","author":"N Aafaq","year":"2021","unstructured":"Aafaq N, Akhtar N, Liu W, Mian A (2021) Empirical autopsy of deep video captioning encoder-decoder architecture. Array 9:100052","journal-title":"Array"},{"key":"1334_CR8","doi-asserted-by":"publisher","first-page":"148","DOI":"10.1016\/j.neunet.2019.09.010","volume":"121","author":"D Roy","year":"2020","unstructured":"Roy D, Panda P, Roy K (2020) Tree-CNN: a hierarchical deep convolutional neural network for incremental learning. Neural Netw 121:148\u2013160","journal-title":"Neural Netw"},{"key":"1334_CR9","doi-asserted-by":"publisher","first-page":"106548","DOI":"10.1016\/j.knosys.2020.106548","volume":"212","author":"JC Lin","year":"2021","unstructured":"Lin JC, Shao Y, Djenouri Y, Yun U (2021) ASRNN: a recurrent neural network with an attention model for sequence labeling. Knowl-Based Syst 212:106548","journal-title":"Knowl-Based Syst"},{"key":"1334_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.cie.2020.107050","volume":"152","author":"A Mohammadi-Balani","year":"2021","unstructured":"Mohammadi-Balani A, Nayeri MD, Azar A, Taghizadeh-Yazdi M (2021) Golden eagle optimizer: A nature-inspired metaheuristic algorithm. Comput Ind Eng 152:107050","journal-title":"Comput Ind Eng"},{"key":"1334_CR11","unstructured":"https:\/\/github.com\/jssprz\/video_captioning_datasets"},{"key":"1334_CR12","doi-asserted-by":"crossref","unstructured":"Om Prakash S, Udhayakumar S, Anjum Khan R, Priyadarshan R (2021) Video captioning for proactive video management using deep machine learning. In: Advances in smart system technologies: Select proceedings of ICFSST 2019, Springer Singapore, pp 801\u2013811","DOI":"10.1007\/978-981-15-5029-4_69"},{"issue":"2","key":"1334_CR13","doi-asserted-by":"publisher","first-page":"880","DOI":"10.1109\/TCSVT.2021.3063423","volume":"32","author":"J Deng","year":"2021","unstructured":"Deng J, Li L, Zhang B, Wang S, Zha Z, Huang Q (2021) Syntax-guided hierarchical attention network for video captioning. IEEE Trans Circuits Syst Video Technol 32(2):880\u2013892","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"11","key":"1334_CR14","doi-asserted-by":"publisher","first-page":"5552","DOI":"10.1109\/TIP.2019.2916757","volume":"28","author":"B Zhao","year":"2019","unstructured":"Zhao B, Li X, Lu X (2019) CAM-RNN: Co-attention model based RNN for video captioning. IEEE Trans Image Process 28(11):5552\u20135565","journal-title":"IEEE Trans Image Process"},{"issue":"2","key":"1334_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s42979-021-00487-x","volume":"2","author":"S Islam","year":"2021","unstructured":"Islam S, Dash A, Seum A, Raj AH, Hossain T, Shah FM (2021) Exploring video captioning techniques: a comprehensive survey on deep learning methods. SN Computer Science 2(2):1\u201328","journal-title":"SN Computer Science"},{"key":"1334_CR16","doi-asserted-by":"crossref","unstructured":"Zheng Z, Wang W, Qi S, Zhu SC (2019) Reasoning visual dialogs with structural and partial observations. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6669\u20136678","DOI":"10.1109\/CVPR.2019.00683"},{"key":"1334_CR17","doi-asserted-by":"crossref","unstructured":"Zellers R, Bisk Y, Farhadi A, Choi Y (2019) From recognition to cognition: visual commonsense reasoning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 2019, pp 6720\u20136731","DOI":"10.1109\/CVPR.2019.00688"},{"key":"1334_CR18","doi-asserted-by":"crossref","unstructured":"Alkalouti HN, Masre MA (2021) Encoder-decoder model for automatic video captioning using yolo algorithm. In: 2021 IEEE International IOT, electronics and mechatronics conference (IEMTRONICS), pp 1\u20134. IEEE.","DOI":"10.1109\/IEMTRONICS52119.2021.9422600"},{"key":"1334_CR19","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1016\/j.neucom.2018.06.096","volume":"395","author":"L Gao","year":"2020","unstructured":"Gao L, Wang X, Song J, Liu Y (2020) Fused GRU with semantic-temporal attention for video captioning. Neurocomputing 395:222\u2013228","journal-title":"Neurocomputing"},{"key":"1334_CR20","doi-asserted-by":"publisher","first-page":"144507","DOI":"10.1016\/j.scitotenv.2020.144507","volume":"765","author":"B Zhang","year":"2021","unstructured":"Zhang B, Zou G, Qin D, Lu Y, Jin Y, Wang H (2021) A novel Encoder-Decoder model based on read-first LSTM for air pollutant prediction. Sci Total Environ 765:144507","journal-title":"Sci Total Environ"},{"issue":"3","key":"1334_CR21","doi-asserted-by":"publisher","first-page":"8","DOI":"10.4236\/jcc.2019.73002","volume":"7","author":"U Sara","year":"2019","unstructured":"Sara U, Akter M, Uddin MS (2019) Image quality assessment through FSIM, SSIM, MSE and PSNR\u2014a comparative study. J Comput Commun 7(3):8\u201318","journal-title":"J Comput Commun"}],"updated-by":[{"DOI":"10.1007\/s00607-025-01446-7","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T00:00:00Z","timestamp":1741824000000}}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01334-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-024-01334-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01334-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,13]],"date-time":"2025-03-13T05:33:01Z","timestamp":1741843981000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-024-01334-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,13]]},"references-count":21,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2024,11]]}},"alternative-id":["1334"],"URL":"https:\/\/doi.org\/10.1007\/s00607-024-01334-6","relation":{"correction":[{"id-type":"doi","id":"10.1007\/s00607-025-01446-7","asserted-by":"object"}]},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"value":"0010-485X","type":"print"},{"value":"1436-5057","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,13]]},"assertion":[{"value":"20 September 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2025","order":4,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":5,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The original online version of this article was revised to update the corresponding author\u2019s affiliation. This has been corrected now.","order":6,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2025","order":7,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":8,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":9,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s00607-025-01446-7","URL":"https:\/\/doi.org\/10.1007\/s00607-025-01446-7","order":10,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}},{"value":"This article does not contain any studies with human participants performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not Applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Not Applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Human and animal ethics"}}]}}