{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:22:44Z","timestamp":1772119364989,"version":"3.50.1"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T00:00:00Z","timestamp":1642636800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,20]],"date-time":"2022-01-20T00:00:00Z","timestamp":1642636800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"the Natural Science Foundation of Shandong Province","award":["ZR2018BF001"],"award-info":[{"award-number":["ZR2018BF001"]}]},{"DOI":"10.13039\/501100002858","name":"the China Postdoctoral Science Foundation","doi-asserted-by":"crossref","award":["2019M652433"],"award-info":[{"award-number":["2019M652433"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Soft Comput"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s00500-021-06622-3","type":"journal-article","created":{"date-parts":[[2022,1,19]],"date-time":"2022-01-19T19:03:46Z","timestamp":1642619026000},"page":"1501-1507","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["RNIC-A retrospect network for image captioning"],"prefix":"10.1007","volume":"26","author":[{"given":"Xiu-Long","family":"Yi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rong","family":"Hua","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"You","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Du-Lei","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhi-Yu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,20]]},"reference":[{"key":"6622_CR1","doi-asserted-by":"crossref","unstructured":"Anderson P, He X, Buehler C, et al. Bottom-up and top-down attention for image captioning and visual question answering[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 6077-6086","DOI":"10.1109\/CVPR.2018.00636"},{"key":"6622_CR2","unstructured":"Bahdanau D, Cho K, Bengio Y. Neural machine translation by jointly learning to align and translate[J]. arXiv preprint arXiv:1409.0473, 2014"},{"key":"6622_CR3","unstructured":"Banerjee S, Lavie A. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments[C] Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 2005: 65-72"},{"key":"6622_CR4","unstructured":"Chen X, Fang H, Lin T Y, et al. Microsoft coco captions: Data collection and evaluation server[J]. arXiv preprint arXiv:1504.00325, 2015"},{"key":"6622_CR5","doi-asserted-by":"publisher","first-page":"154953","DOI":"10.1109\/ACCESS.2020.3018752","volume":"8","author":"L Cheng","year":"2020","unstructured":"Cheng L, Wei W, Mao X et al (2020) Stack-VS: stacked visual-semantic attention for image caption generation[J]. IEEE Access 8:154953\u2013154965","journal-title":"IEEE Access"},{"key":"6622_CR6","doi-asserted-by":"crossref","unstructured":"Chen L, Zhang H, Xiao J, et al. Sca-cnn: Spatial and channel-wise attention in convolutional networks for image captioning[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 5659-5667","DOI":"10.1109\/CVPR.2017.667"},{"key":"6622_CR7","doi-asserted-by":"crossref","unstructured":"Cornia M, Stefanini M, Baraldi L, et al. Meshed-memory transformer for image captioning[C] Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10578-10587","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"6622_CR8","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, et al. Imagenet: A large-scale hierarchical image database[C] 2009 IEEE conference on computer vision and pattern recognition. Ieee, 2009: 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6622_CR9","doi-asserted-by":"crossref","unstructured":"Fan Z, Dan T, Yu H, et al. Single Fundus Image Super-Resolution Via Cascaded Channel-Wise Attention Network[C] 2020 42nd Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC). IEEE, 2020: 1984-1987","DOI":"10.1109\/EMBC44109.2020.9176428"},{"key":"6622_CR10","unstructured":"Herdade S, Kappeler A, Boakye K, et al. Image captioning: Transforming objects into words[J]. arXiv preprint arXiv:1906.05963, 2019"},{"issue":"8","key":"6622_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory[J]. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"key":"6622_CR12","doi-asserted-by":"publisher","first-page":"4013","DOI":"10.1109\/TIP.2020.2969330","volume":"29","author":"Y Huang","year":"2020","unstructured":"Huang Y, Chen J, Ouyang W et al (2020) Image captioning with end-to-end attribute detection and subsequent attributes prediction[J]. IEEE Trans Image Process 29:4013\u20134026","journal-title":"IEEE Trans Image Process"},{"key":"6622_CR13","doi-asserted-by":"publisher","first-page":"340","DOI":"10.1016\/j.neucom.2020.04.120","volume":"404","author":"Q Huang","year":"2020","unstructured":"Huang Q, Zhang Y, Peng H et al (2020) Deep subspace clustering to achieve jointly latent feature extraction and discriminative learning[J]. Neurocomputing 404:340\u2013350","journal-title":"Neurocomputing"},{"key":"6622_CR14","doi-asserted-by":"crossref","unstructured":"Huang L, Wang W, Chen J, et al. Attention on attention for image captioning[C] Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2019: 4634-4643","DOI":"10.1109\/ICCV.2019.00473"},{"key":"6622_CR15","doi-asserted-by":"crossref","unstructured":"Huang J, Zhuo E, Li H, et al. Achieving accurate segmentation of nasopharyngeal carcinoma in mr images through recurrent attention[C] International Conference on Medical Image Computing and Computer-Assisted Intervention. Springer, Cham, 2019: 494-502","DOI":"10.1007\/978-3-030-32254-0_55"},{"key":"6622_CR16","doi-asserted-by":"crossref","unstructured":"Ji J, Luo Y, Sun X, et al. Improving image captioning by leveraging intra-and inter-layer global representation in transformer network[C] Proceedings of the AAAI Conference on Artificial Intelligence. 2021, 35(2): 1655-1663","DOI":"10.1609\/aaai.v35i2.16258"},{"key":"6622_CR17","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L. Deep visual-semantic alignments for generating image descriptions[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 3128-3137","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"6622_CR18","doi-asserted-by":"crossref","unstructured":"Ke L, Pei W, Li R, et al. Reflective decoding network for image captioning[C] Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2019: 8888-8897","DOI":"10.1109\/ICCV.2019.00898"},{"key":"6622_CR19","unstructured":"Krishna R, Zhu Y, Groth O, et al. Visual genome: Connecting language and vision using crowdsourced dense image annotations[J]. arXiv preprint arXiv:1602.07332, 2016"},{"key":"6622_CR20","unstructured":"Lin C Y. Rouge: A package for automatic evaluation of summaries[C] Text summarization branches out. 2004: 74-81"},{"issue":"1","key":"6622_CR21","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1109\/TMECH.2018.2870056","volume":"24","author":"T Liu","year":"2018","unstructured":"Liu T, Liu H, Li Y et al (2018) Efficient blind signal reconstruction with wavelet transforms regularization for educational robot infrared vision sensing[J]. IEEE\/ASME Trans Mechatron 24(1):384\u2013394","journal-title":"IEEE\/ASME Trans Mechatron"},{"issue":"12","key":"6622_CR22","first-page":"5268","volume":"14","author":"T Liu","year":"2018","unstructured":"Liu T, Liu H, Chen Z et al (2018) Fast blind instrument function estimation method for industrial infrared spectrometers[J]. IEEE Trans Indus Inf 14(12):5268\u20135277","journal-title":"IEEE Trans Indus Inf"},{"issue":"1","key":"6622_CR23","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1109\/TII.2019.2934728","volume":"16","author":"T Liu","year":"2019","unstructured":"Liu T, Liu H, Li YF et al (2019) Flexible FTIR spectral imaging enhancement for industrial robot infrared vision sensing[J]. IEEE Trans Indus Inf 16(1):544\u2013554","journal-title":"IEEE Trans Indus Inf"},{"key":"6622_CR24","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1016\/j.neucom.2020.09.068","volume":"433","author":"H Liu","year":"2021","unstructured":"Liu H, Nie H, Zhang Z et al (2021) Anisotropic angle distribution learning for head pose estimation and attention understanding in human-computer interaction[J]. Neurocomputing 433:310\u2013322","journal-title":"Neurocomputing"},{"key":"6622_CR25","doi-asserted-by":"crossref","unstructured":"Liu H, Fang S, Zhang Z, et al. MFDNet: Collaborative Poses Perception and Matrix Fisher Distribution for Head Pose Estimation[J]. IEEE Transactions on Multimedia, 2021","DOI":"10.1109\/TMM.2021.3081873"},{"key":"6622_CR26","doi-asserted-by":"crossref","unstructured":"Li G, Zhu L, Liu P, et al. Entangled transformer for image captioning[C] Proceedings of the IEEE\/CVF International Conference on Computer Vision. 2019: 8928-8937","DOI":"10.1109\/ICCV.2019.00902"},{"key":"6622_CR27","first-page":"289","volume":"29","author":"J Lu","year":"2016","unstructured":"Lu J, Yang J, Batra D et al (2016) Hierarchical question-image co-attention for visual question answering[J]. Adv Neu Inf Process Syst 29:289\u2013297","journal-title":"Adv Neu Inf Process Syst"},{"key":"6622_CR28","doi-asserted-by":"crossref","unstructured":"Lu J, Xiong C, Parikh D, et al. Knowing when to look: Adaptive attention via a visual sentinel for image captioning[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 375-383","DOI":"10.1109\/CVPR.2017.345"},{"key":"6622_CR29","doi-asserted-by":"crossref","unstructured":"Nam H, Ha J W, Kim J. Dual attention networks for multimodal reasoning and matching[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 299-307","DOI":"10.1109\/CVPR.2017.232"},{"key":"6622_CR30","doi-asserted-by":"crossref","unstructured":"Pan Y, Yao T, Li Y, et al. X-linear attention networks for image captioning[C] Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10971-10980","DOI":"10.1109\/CVPR42600.2020.01098"},{"key":"6622_CR31","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, et al. Bleu: a method for automatic evaluation of machine translation[C] Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 2002: 311-318","DOI":"10.3115\/1073083.1073135"},{"key":"6622_CR32","doi-asserted-by":"crossref","unstructured":"Qin Y, Du J, Zhang Y, et al. Look back and predict forward in image captioning[C] Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 8367-8375","DOI":"10.1109\/CVPR.2019.00856"},{"key":"6622_CR33","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren S, He K, Girshick R et al (2015) Faster r-cnn: Towards real-time object detection with region proposal networks[J]. Adv Neu Inf Process Syst 28:91\u201399","journal-title":"Adv Neu Inf Process Syst"},{"key":"6622_CR34","doi-asserted-by":"crossref","unstructured":"Rennie S J, Marcheret E, Mroueh Y, et al. Self-critical sequence training for image captioning[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 7008-7024","DOI":"10.1109\/CVPR.2017.131"},{"key":"6622_CR35","unstructured":"Rockt\u00e4schel T, Grefenstette E, Hermann K M, et al. Reasoning about entailment with neural attention[J]. arXiv preprint arXiv:1509.06664, 2015"},{"key":"6622_CR36","doi-asserted-by":"crossref","unstructured":"Vedantam R, Lawrence Zitnick C, Parikh D. Cider: Consensus-based image description evaluation[C] Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 4566-4575","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"6622_CR37","unstructured":"Xu K, Ba J, Kiros R, et al. Show, attend and tell: Neural image caption generation with visual attention[C] International conference on machine learning. PMLR, 2015: 2048-2057"},{"key":"6622_CR38","doi-asserted-by":"crossref","unstructured":"Yang X, Tang K, Zhang H, et al. Auto-encoding scene graphs for image captioning[C] Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019: 10685-10694","DOI":"10.1109\/CVPR.2019.01094"},{"key":"6622_CR39","doi-asserted-by":"crossref","unstructured":"Yao T, Pan Y, Li Y, et al. Exploring visual relationship for image captioning[C] Proceedings of the European conference on computer vision (ECCV). 2018: 684-699","DOI":"10.1007\/978-3-030-01264-9_42"},{"key":"6622_CR40","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1162\/tacl_a_00097","volume":"4","author":"W Yin","year":"2016","unstructured":"Yin W, Sch\u00fctze H, Xiang B et al (2016) Abcnn: Attention-based convolutional neural network for modeling sentence pairs[J]. Trans Assoc Comput Linguist 4:259\u2013272","journal-title":"Trans Assoc Comput Linguist"}],"container-title":["Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-021-06622-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00500-021-06622-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-021-06622-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,23]],"date-time":"2023-01-23T15:27:30Z","timestamp":1674487650000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00500-021-06622-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,20]]},"references-count":40,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["6622"],"URL":"https:\/\/doi.org\/10.1007\/s00500-021-06622-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-985124\/v1","asserted-by":"object"}]},"ISSN":["1432-7643","1433-7479"],"issn-type":[{"value":"1432-7643","type":"print"},{"value":"1433-7479","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,20]]},"assertion":[{"value":"29 November 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 January 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Authors XIU-LONG YI, RONG HUA ,YOU FU, DU-LEI ZHENG, and Zhi-Yu Wang declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This study was funded by National key research and development project(2017YFB0202002)","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Funding"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Informed consent was obtained from all individual participants included in the study.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}}]}}