{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T14:19:21Z","timestamp":1760710761997,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2021,11,13]],"date-time":"2021-11-13T00:00:00Z","timestamp":1636761600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,11,13]],"date-time":"2021-11-13T00:00:00Z","timestamp":1636761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004607","name":"Natural Science Foundation of Guangxi Province","doi-asserted-by":"publisher","award":["2018GXNSFDA281019"],"award-info":[{"award-number":["2018GXNSFDA281019"]}],"id":[{"id":"10.13039\/501100004607","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61966005"],"award-info":[{"award-number":["61966005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s10489-021-02943-w","type":"journal-article","created":{"date-parts":[[2021,11,13]],"date-time":"2021-11-13T01:02:35Z","timestamp":1636765355000},"page":"9017-9032","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Collaborative strategy network for spatial attention image captioning"],"prefix":"10.1007","volume":"52","author":[{"given":"Dongming","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7422-7297","authenticated-orcid":false,"given":"Jing","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Riqiang","family":"Bao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,11,13]]},"reference":[{"key":"2943_CR1","doi-asserted-by":"crossref","unstructured":"Anderson P, He X, Buehler C, Teney D, Johnson M, Gould S, Zhang L (2018) Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 6077\u20136086","DOI":"10.1109\/CVPR.2018.00636"},{"issue":"114","key":"2943_CR2","first-page":"431","volume":"169","author":"KK Babu","year":"2021","unstructured":"Babu KK, Dubey SR (2021) Csgan: Cyclic-synthesized generative adversarial networks for image-to-image transformation. Expert Syst Appl 169(114):431","journal-title":"Expert Syst Appl"},{"key":"2943_CR3","doi-asserted-by":"crossref","unstructured":"Bodapati JD (2021) Sae-pd-seq: sequence autoencoder-based pre-training of decoder for sequence learning tasks. SIViP, pp 1\u20137","DOI":"10.1007\/s11760-021-01877-7"},{"key":"2943_CR4","doi-asserted-by":"crossref","unstructured":"Cao T, Han K, Wang X, Ma L, Fu Y, Jiang YG, Xue X (2020) Feature deformation meta-networks in image captioning of novel objects. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 10,494\u201310,501","DOI":"10.1609\/aaai.v34i07.6620"},{"issue":"41","key":"2943_CR5","doi-asserted-by":"publisher","first-page":"30,615","DOI":"10.1007\/s11042-020-09539-5","volume":"79","author":"T do Carmo Nogueira","year":"2020","unstructured":"do Carmo Nogueira T, Vinhal CDN, da Cruz J\u00fanior G, Ullmann MRD (2020) Reference-based model using multimodal gated recurrent units for image captioning. Multimedia Tools and Applications 79 (41):30,615\u201330,635","journal-title":"Multimedia Tools and Applications"},{"key":"2943_CR6","doi-asserted-by":"crossref","unstructured":"Chen J, Jin Q (2020) Better captioning with sequence-level exploration. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10,890\u201310,899","DOI":"10.1109\/CVPR42600.2020.01090"},{"key":"2943_CR7","doi-asserted-by":"crossref","unstructured":"Chen S, Jin Q, Wang P, Wu Q (2020) Say as you wish: Fine-grained control of image caption generation with abstract scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9962\u20139971","DOI":"10.1109\/CVPR42600.2020.00998"},{"key":"2943_CR8","doi-asserted-by":"crossref","unstructured":"Han HY, Chen YC, Hsiao PY, Fu LC (2020) Using channel-wise attention for deep cnn based real-time semantic segmentation with class-aware edge information IEEE Transactions on Intelligent Transportation Systems","DOI":"10.1109\/TITS.2019.2962094"},{"issue":"13","key":"2943_CR9","doi-asserted-by":"publisher","first-page":"3021","DOI":"10.1049\/iet-ipr.2019.1317","volume":"14","author":"J He","year":"2020","unstructured":"He J, Zhao Y, Sun B, Yu L (2020) Feedback evaluations to promote image captioning. IET Image Process 14(13):3021\u20133027","journal-title":"IET Image Process"},{"issue":"7","key":"2943_CR10","doi-asserted-by":"publisher","first-page":"941","DOI":"10.1587\/transinf.2020EDP7227","volume":"104","author":"S He","year":"2021","unstructured":"He S, Lu Y, Chen S (2021) Image captioning algorithm based on multi-branch cnn and bi-lstm. IEICE Trans Inf Syst 104(7):941\u2013947","journal-title":"IEICE Trans Inf Syst"},{"key":"2943_CR11","doi-asserted-by":"publisher","first-page":"3499","DOI":"10.1109\/TIP.2021.3061927","volume":"30","author":"T Hu","year":"2021","unstructured":"Hu T, Long C, Xiao C (2021) A novel visual representation on text using diverse conditional gan for visual recognition. IEEE Trans Image Process 30:3499\u20133512","journal-title":"IEEE Trans Image Process"},{"key":"2943_CR12","doi-asserted-by":"crossref","unstructured":"Huang F, Li X, Yuan C, Zhang S, Zhang J, Qiao S (2021) Attention-emotion-enhanced convolutional lstm for sentiment analysis IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2021.3056664"},{"key":"2943_CR13","doi-asserted-by":"publisher","first-page":"4013","DOI":"10.1109\/TIP.2020.2969330","volume":"29","author":"Y Huang","year":"2020","unstructured":"Huang Y, Chen J, Ouyang W, Wan W, Xue Y (2020) Image captioning with end-to-end attribute detection and subsequent attributes prediction. IEEE Trans Image Process 29:4013\u2013 4026","journal-title":"IEEE Trans Image Process"},{"issue":"107","key":"2943_CR14","first-page":"928","volume":"115","author":"J Ji","year":"2021","unstructured":"Ji J, Du Z, Zhang X (2021) Divergent-convergent attention for image captioning. Pattern Recogn 115(107):928","journal-title":"Pattern Recogn"},{"key":"2943_CR15","doi-asserted-by":"crossref","unstructured":"Li W, Wang Q, Wu J, Yu Z (2021) Piecewise convolutional neural networks with position attention and similar bag attention for distant supervision relation extraction. Appl Intell, pp 1\u201311","DOI":"10.1007\/s10489-021-02632-8"},{"key":"2943_CR16","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1016\/j.neucom.2020.09.068","volume":"433","author":"H Liu","year":"2021","unstructured":"Liu H, Nie H, Zhang Z, Li YF (2021) Anisotropic angle distribution learning for head pose estimation and attention understanding in human-computer interaction. Neurocomputing 433:310\u2013322","journal-title":"Neurocomputing"},{"key":"2943_CR17","doi-asserted-by":"publisher","first-page":"2450","DOI":"10.1109\/TIP.2021.3051476","volume":"30","author":"H Liu","year":"2021","unstructured":"Liu H, Zhang S, Lin K, Wen J, Li J, Hu X (2021) Vocabulary-wide credit assignment for training image captioning models. IEEE Trans Image Process 30:2450\u20132460","journal-title":"IEEE Trans Image Process"},{"issue":"2","key":"2943_CR18","doi-asserted-by":"publisher","first-page":"102,178","DOI":"10.1016\/j.ipm.2019.102178","volume":"57","author":"M Liu","year":"2020","unstructured":"Liu M, Li L, Hu H, Guan W, Tian J (2020) Image caption generation with dual attention mechanism. Information Processing & Management 57(2):102,178","journal-title":"Information Processing & Management"},{"issue":"1s","key":"2943_CR19","first-page":"1","volume":"17","author":"H Lu","year":"2021","unstructured":"Lu H, Yang R, Deng Z, Zhang Y, Gao G, Lan R (2021) Chinese image captioning via fuzzy attention-based densenet-bilstm. ACM Transactions on Multimedia Computing. Communications, and Applications (TOMM) 17(1s):1\u201318","journal-title":"Communications, and Applications (TOMM)"},{"key":"2943_CR20","doi-asserted-by":"crossref","unstructured":"Rennie SJ, Marcheret E, Mroueh Y, Ross J, Goel V (2017) Self-critical sequence training for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7008\u20137024","DOI":"10.1109\/CVPR.2017.131"},{"key":"2943_CR21","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1016\/j.procs.2020.06.028","volume":"173","author":"R Sharma","year":"2020","unstructured":"Sharma R, Kumar A, Meena D, Pushp S (2020) Employing differentiable neural computers for image captioning and neural machine translation. Procedia Computer Science 173:234\u2013 244","journal-title":"Procedia Computer Science"},{"issue":"1","key":"2943_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3419106","volume":"2","author":"T Shi","year":"2021","unstructured":"Shi T, Keneshloo Y, Ramakrishnan N, Reddy CK (2021) Neural abstractive text summarization with sequence-to-sequence models. ACM Transactions on Data Science 2(1):1\u201337","journal-title":"ACM Transactions on Data Science"},{"key":"2943_CR23","doi-asserted-by":"crossref","unstructured":"Sun B, Wu Y, Zhao K, He J, Yu L, Yan H, Luo A (2021) Student class behavior dataset: a video dataset for recognizing, detecting, and captioning students\u2019 behaviors in classroom scenes. Neural Comput & Applic, pp 1\u201320","DOI":"10.1007\/s00521-020-05587-y"},{"issue":"6","key":"2943_CR24","doi-asserted-by":"publisher","first-page":"3311","DOI":"10.1007\/s10489-020-01949-0","volume":"51","author":"C Sun","year":"2021","unstructured":"Sun C, Ai Y, Wang S, Zhang W (2021) Mask-guided ssd for small-object detection. Appl Intell 51(6):3311\u20133322","journal-title":"Appl Intell"},{"key":"2943_CR25","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.neucom.2019.12.073","volume":"387","author":"Y Wei","year":"2020","unstructured":"Wei Y, Wang L, Cao H, Shao M, Wu C (2020) Multi-attention generative adversarial network for image captioning. Neurocomputing 387:91\u201399","journal-title":"Neurocomputing"},{"key":"2943_CR26","doi-asserted-by":"crossref","unstructured":"Yan S, Wu F, Smith JS, Lu W, Zhang B (2018) Image captioning using adversarial networks and reinforcement learning. In: 2018 24th International Conference on Pattern Recognition (ICPR), pp. 248\u2013253. IEEE","DOI":"10.1109\/ICPR.2018.8545049"},{"issue":"3","key":"2943_CR27","doi-asserted-by":"publisher","first-page":"2013","DOI":"10.1007\/s11042-019-08209-5","volume":"79","author":"S Wang","year":"2020","unstructured":"Wang S, Lan L, Zhang X, Dong G, Luo Z (2020) Object-aware semantics of attention for image captioning. Multimedia Tools and Applications 79(3):2013\u20132030","journal-title":"Multimedia Tools and Applications"},{"key":"2943_CR28","doi-asserted-by":"crossref","unstructured":"Xu M, Fu P, Liu B, Yin H, Li J (2021) A novel dynamic graph evolution network for salient object detection. Appl Intell, pp 1\u201318","DOI":"10.1007\/s10489-021-02479-z"},{"issue":"12","key":"2943_CR29","doi-asserted-by":"publisher","first-page":"5412","DOI":"10.1109\/TNNLS.2020.2967597","volume":"31","author":"X Xu","year":"2020","unstructured":"Xu X, Wang T, Yang Y, Zuo L, Shen F, Shen HT (2020) Cross-modal attention with semantic consistence for image\u2013text matching. IEEE transactions on neural networks and learning systems 31 (12):5412\u20135425","journal-title":"IEEE transactions on neural networks and learning systems"},{"key":"2943_CR30","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.neucom.2020.09.084","volume":"427","author":"S Yang","year":"2021","unstructured":"Yang S, Niu J, Wu J, Wang Y, Liu X, Li Q (2021) Automatic ultrasound image report generation with adaptive multimodal attention mechanism. Neurocomputing 427:40\u201349","journal-title":"Neurocomputing"},{"key":"2943_CR31","doi-asserted-by":"crossref","unstructured":"Yang X, Tang K, Zhang H, Cai J (2019) Auto-encoding scene graphs for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10,685\u201310,694","DOI":"10.1109\/CVPR.2019.01094"},{"key":"2943_CR32","doi-asserted-by":"crossref","unstructured":"Yang X, Zhang H, Cai J (2019) Learning to collocate neural modules for image captioning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4250\u20134260","DOI":"10.1109\/ICCV.2019.00435"},{"key":"2943_CR33","doi-asserted-by":"crossref","unstructured":"Yang X, Zhang H, Cai J (2020) Auto-encoding and distilling scene graphs for image captioning IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2020.3042192"},{"issue":"3","key":"2943_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3394955","volume":"16","author":"J Yuan","year":"2020","unstructured":"Yuan J, Zhang L, Guo S, Xiao Y, Li Z (2020) Image captioning with a joint attention mechanism by visual concept samples. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) 16(3):1\u201322","journal-title":"ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)"},{"key":"2943_CR35","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.inffus.2020.08.022","volume":"66","author":"H Zhang","year":"2021","unstructured":"Zhang H, Le Z, Shao Z, Xu H, Ma J (2021) Mff-gan: an unsupervised generative adversarial network with adaptive and gradient joint constraints for multi-focus image fusion. Information Fusion 66:40\u201353","journal-title":"Information Fusion"},{"key":"2943_CR36","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.patrec.2020.12.020","volume":"143","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Shi X, Mi S, Yang X (2021) Image captioning with transformer and knowledge graph. Pattern Recogn Lett 143:43\u201349","journal-title":"Pattern Recogn Lett"},{"key":"2943_CR37","doi-asserted-by":"crossref","unstructured":"Zhang Z, Wu Q, Wang Y, Chen F (2021) Exploring region relationships implicitly: Image captioning with visual relationship attention. Image and Vision Computing p 104146","DOI":"10.1016\/j.imavis.2021.104146"},{"key":"2943_CR38","doi-asserted-by":"crossref","unstructured":"Zhong X, Nie G, Huang W, Liu W, Ma B, Lin CW (2021) Attention-guided image captioning with adaptive global and local feature fusion. Journal of Visual Communication and Image Representation p 103138","DOI":"10.1016\/j.jvcir.2021.103138"},{"key":"2943_CR39","doi-asserted-by":"crossref","unstructured":"Zhou Y, Wang M, Liu D, Hu Z, Zhang H (2020) More grounded image captioning by distilling image-text matching model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4777\u20134786","DOI":"10.1109\/CVPR42600.2020.00483"},{"key":"2943_CR40","doi-asserted-by":"crossref","unstructured":"Zhu H, Wang R, Zhang X (2021) Image captioning with dense fusion connection and improved stacked attention module. Neural Process Lett, pp 1\u201318","DOI":"10.1007\/s11063-021-10431-y"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02943-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-021-02943-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02943-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,30]],"date-time":"2022-05-30T09:19:58Z","timestamp":1653902398000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-021-02943-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,11,13]]},"references-count":40,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["2943"],"URL":"https:\/\/doi.org\/10.1007\/s10489-021-02943-w","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2021,11,13]]},"assertion":[{"value":"19 October 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This paper strictly abides by the moral standards of this journal.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"All the authors of this paper have reviewed and agreed to contribute to your journal by consensus.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to Participate"}},{"value":"Once this paper is hired, we agree to publish it in your journal.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Consent for Publication"}},{"value":"No conflict of interest exits in the submission of this manuscript, and manuscript is approved by all authors for publication.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}