{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:45Z","timestamp":1740122865818,"version":"3.37.3"},"reference-count":85,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T00:00:00Z","timestamp":1605484800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,3]]},"DOI":"10.1007\/s11042-020-09865-8","type":"journal-article","created":{"date-parts":[[2020,11,16]],"date-time":"2020-11-16T07:03:35Z","timestamp":1605510215000},"page":"9917-9959","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["CRUR: coupled-recurrent unit for unification, conceptualization and context capture for language representation - a generalization of bi directional LSTM"],"prefix":"10.1007","volume":"80","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1563-9304","authenticated-orcid":false,"given":"Chiranjib","family":"Sur","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,16]]},"reference":[{"key":"9865_CR1","doi-asserted-by":"crossref","unstructured":"Anderson P, He X, Buehler C, Teney D, Johnson M, Gould S, Zhang L (2018) Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR, vol 3, p 6","DOI":"10.1109\/CVPR.2018.00636"},{"key":"9865_CR2","doi-asserted-by":"crossref","unstructured":"Chen H, Ding G, Lin Z, Zhao S, Han J (2018) Show, observe and tell: attribute-driven attention model for image captioning. In: IJCAI, pp 606\u2013612","DOI":"10.24963\/ijcai.2018\/84"},{"key":"9865_CR3","doi-asserted-by":"crossref","unstructured":"Chen M, Ding G, Zhao S, Chen H, Liu Q, Han J (2017) Reference based LSTM for image captioning. In: AAAI, pp 3981\u20133987","DOI":"10.1609\/aaai.v31i1.11198"},{"key":"9865_CR4","doi-asserted-by":"crossref","unstructured":"Chen F, Ji R, Su J, Wu Y, Wu Y (2017) Structcap: Structured semantic embedding for image captioning. In: Proceedings of the 2017 ACM on multimedia conference. ACM, pp 46\u201354","DOI":"10.1145\/3123266.3123275"},{"key":"9865_CR5","doi-asserted-by":"crossref","unstructured":"Chen F, Ji R, Sun X, Wu Y, Su J (2018) GroupCap: group-based image captioning with structured relevance and diversity constraints. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1345\u20131353","DOI":"10.1109\/CVPR.2018.00146"},{"key":"9865_CR6","unstructured":"Chen H, Zhang H, Chen PY, Yi J, Hsieh CJ (2017) Show-and-fool: Crafting adversarial examples for neural image captioning. arXiv:1712.02051"},{"key":"9865_CR7","doi-asserted-by":"crossref","unstructured":"Chen T, Zhang Z, You Q, Fang C, Wang Z, Jin H, Luo J (2018) Factual or emotional: stylized image captioning with adaptive learning and attention. arXiv:1807.03871","DOI":"10.1007\/978-3-030-01249-6_32"},{"key":"9865_CR8","doi-asserted-by":"crossref","unstructured":"Chen X, Zitnick CL (2015) A recurrent visual representation for image caption generation. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"9865_CR9","unstructured":"Cohn-Gordon R, Goodman N, Potts C (2018) Pragmatically informative image captioning with character-level reference. arXiv:1804.05417"},{"key":"9865_CR10","doi-asserted-by":"crossref","unstructured":"Cornia M, Baraldi L, Serra G, Cucchiara R (2018) Paying more attention to saliency: image captioning with saliency and context attention, vol 14, p 48","DOI":"10.1145\/3177745"},{"key":"9865_CR11","unstructured":"Devlin J, Gupta S, Girshick R, Mitchell M, Zitnick CL (2015) Exploring nearest neighbor approaches for image captioning. arXiv:1505.04467"},{"key":"9865_CR12","doi-asserted-by":"crossref","unstructured":"Devlin J et al (2015) Language models for image captioning: the quirks and what works. arXiv:1505.01809","DOI":"10.3115\/v1\/P15-2017"},{"key":"9865_CR13","doi-asserted-by":"crossref","unstructured":"Donahue J et al (2015) Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"9865_CR14","doi-asserted-by":"crossref","unstructured":"Fang H et al (2015) From captions to visual concepts and back. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298754"},{"issue":"12","key":"9865_CR15","doi-asserted-by":"publisher","first-page":"2321","DOI":"10.1109\/TPAMI.2016.2642953","volume":"39","author":"K Fu","year":"2017","unstructured":"Fu K, Jin J, Cui R, Sha F, Zhang C (2017) Aligning where to see and what to tell: Image captioning with region-based attention and scene-specific contexts. IEEE Trans Pattern Anal Mach Intell 39(12):2321\u20132334","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9865_CR16","first-page":"1","volume":"99","author":"K Fu","year":"2018","unstructured":"Fu K, Li J, Jin J, Zhang C (2018) Image-text surgery: efficient concept learning in image captioning by generating pseudopairs. IEEE Trans Neural Netw Learn Syst 99:1\u201312","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"9865_CR17","unstructured":"Gan Z et al (2016) Semantic compositional networks for visual captioning arXiv:1611.08002"},{"key":"9865_CR18","doi-asserted-by":"crossref","unstructured":"Gan C, et al. (2017) Stylenet: generating attractive visual captions with styles. In: CVPR","DOI":"10.1109\/CVPR.2017.108"},{"key":"9865_CR19","doi-asserted-by":"crossref","unstructured":"Harzig P, Brehm S, Lienhart R, Kaiser C, Schallner R (2018) Multimodal image captioning for marketing analysis. arXiv:1802.01958","DOI":"10.1109\/MIPR.2018.00035"},{"key":"9865_CR20","doi-asserted-by":"crossref","unstructured":"Jia X et al (2015) Guiding the long-short term memory model for image caption generation. In: Proceedings of the IEEE international conference on computer vision","DOI":"10.1109\/ICCV.2015.277"},{"key":"9865_CR21","doi-asserted-by":"crossref","unstructured":"Jiang W, Ma L, Chen X, Zhang H, Liu W (2018) Learning to guide decoding for image captioning. arXiv:1804.00887","DOI":"10.1609\/aaai.v32i1.12283"},{"key":"9865_CR22","unstructured":"Jin J et al (2015) Aligning where to see and what to tell: image caption with region-based attention and scene factorization. arXiv:1506.06272"},{"key":"9865_CR23","doi-asserted-by":"crossref","unstructured":"Karpathy A, Li Fei-Fei (2015) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE Conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298932"},{"issue":"6","key":"9865_CR24","doi-asserted-by":"publisher","first-page":"398","DOI":"10.1049\/iet-cvi.2016.0286","volume":"11","author":"M Kilickaya","year":"2017","unstructured":"Kilickaya M, Akkus BK, Cakici R, Erdem A, Erdem E, Ikizler-Cinbis N (2017) Data-driven image captioning via salient region discovery. IET Comput Vis 11(6):398\u2013406","journal-title":"IET Comput Vis"},{"key":"9865_CR25","unstructured":"Kiros R, Salakhutdinov R, Zemel R (2014) Multimodal neural language models. In: International conference on machine learning, pp 595\u2013603"},{"key":"9865_CR26","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123.1","author":"R Krishna","year":"2017","unstructured":"Krishna R et al (2017) Visual genome: Connecting language and vision using crowdsourced dense image annotations. Int J Comput Vis 123.1:32\u201373","journal-title":"Int J Comput Vis"},{"key":"9865_CR27","unstructured":"Li X, Wang X, Xu C, Lan W, Wei Q, Yang G, Xu J (2018) COCO-CN for cross-lingual image tagging, captioning and retrieval. arXiv:1805.08661"},{"key":"9865_CR28","doi-asserted-by":"crossref","unstructured":"Liu X, Li H, Shao J, Chen D, Wang X (2018) Show, tell and discriminate: image captioning by self-retrieval with partially labeled data. arXiv:1803.08314","DOI":"10.1007\/978-3-030-01267-0_21"},{"key":"9865_CR29","doi-asserted-by":"crossref","unstructured":"Liu C, Mao J, Sha F, Yuille AL (2017) Attention correctness in neural image captioning. In: AAAI, pp 4176\u20134182","DOI":"10.1609\/aaai.v31i1.11197"},{"key":"9865_CR30","doi-asserted-by":"crossref","unstructured":"Liu C, Sun F, Wang C, Wang F, Yuille A (2017) MAT: A multimodal attentive translator for image captioning. arXiv:1702.05658","DOI":"10.24963\/ijcai.2017\/563"},{"key":"9865_CR31","doi-asserted-by":"crossref","unstructured":"Liu S, Zhu Z, Ye N, Guadarrama S, Murphy K (2017) Improved image captioning via policy gradient optimization of spider. In: Proceedings of the IEEE international conference on computer vision , vol 3, p 3","DOI":"10.1109\/ICCV.2017.100"},{"key":"9865_CR32","doi-asserted-by":"publisher","first-page":"102178","DOI":"10.1016\/j.ipm.2019.102178","volume":"57.2","author":"M Liu","year":"2020","unstructured":"Liu M et al (2020) Image caption generation with dual attention mechanism. Inf Process Manag 57.2:102178","journal-title":"Inf Process Manag"},{"key":"9865_CR33","doi-asserted-by":"crossref","unstructured":"Lu D, Whitehead S, Huang L, Ji H, Chang SF (2018) Entity-aware image caption generation. arXiv:1804.07889","DOI":"10.18653\/v1\/D18-1435"},{"key":"9865_CR34","doi-asserted-by":"crossref","unstructured":"Lu J, Xiong C, Parikh D, Socher R (2017) Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition (CVPR), vol 6, p 2","DOI":"10.1109\/CVPR.2017.345"},{"key":"9865_CR35","doi-asserted-by":"crossref","unstructured":"Lu J, Yang J, Batra D, Parikh D (2018) Neural baby talk. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7219\u20137228","DOI":"10.1109\/CVPR.2018.00754"},{"key":"9865_CR36","unstructured":"Mao J et al (2014) Deep captioning with multimodal recurrent neural networks (m-rnn). arXiv:1412.6632"},{"key":"9865_CR37","unstructured":"Melnyk I, Sercu T, Dognin PL, Ross J, Mroueh Y (2018) Improved image captioning with adversarial semantic alignment. arXiv:1805.00063"},{"key":"9865_CR38","unstructured":"Palangi H, Smolensky P, He X, Deng L, Redmond WA (2017) Deep learning of grammatically-interpretable representations through question-answering. arXiv:1705.08432"},{"key":"9865_CR39","doi-asserted-by":"crossref","unstructured":"Park CC, Kim B, Kim G (2017) Attend to you: Personalized image captioning with context sequence memory networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 895\u2013903","DOI":"10.1109\/CVPR.2017.681"},{"key":"9865_CR40","doi-asserted-by":"crossref","unstructured":"Park CC, Kim B, Kim G (2018) Towards personalized image captioning via multimodal memory networks. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2018.2824816"},{"key":"9865_CR41","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.imavis.2019.03.003","volume":"86","author":"Y Peng","year":"2019","unstructured":"Peng Y et al (2019) Image caption model of double LSTM with scene factors. Image Vis Comput 86:38\u201344","journal-title":"Image Vis Comput"},{"key":"9865_CR42","doi-asserted-by":"crossref","unstructured":"Ren Z, Wang X, Zhang N, Lv X, Li LJ (2017) Deep reinforcement learning-based image captioning with embedding reward. arXiv:1704.03899","DOI":"10.1109\/CVPR.2017.128"},{"key":"9865_CR43","doi-asserted-by":"crossref","unstructured":"Rennie SJ, Marcheret E, Mroueh Y, Ross J, Goel V (2017) Self-critical sequence training for image captioning. In: CVPR, vol 1, p 3","DOI":"10.1109\/CVPR.2017.131"},{"key":"9865_CR44","doi-asserted-by":"crossref","unstructured":"Sharma P, Ding N, Goodman S, Soricut R (2018) Conceptual captions: a cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: Proceedings of the 56th annual meeting of the association for computational linguistics (volume 1: long papers), vol 1, pp 2556\u20132565","DOI":"10.18653\/v1\/P18-1238"},{"key":"9865_CR45","doi-asserted-by":"crossref","unstructured":"Sharma G et al (2019) Visual Image Caption Generator Using Deep Learning Available at SSRN 3368837","DOI":"10.2139\/ssrn.3368837"},{"key":"9865_CR46","unstructured":"Sur C (2018) Feature fusion effects of tensor product representation on (de) compositional network for caption generation for images. arXiv:1812.06624"},{"key":"9865_CR47","unstructured":"Sur C (2018) Feature fusion effects of tensor product representation on (de) compositional network for caption generation for images. arXiv:1812.06624"},{"key":"9865_CR48","unstructured":"Sur C (2018) Representation for language understanding, University of Florida, Gainesville. Available at: https:\/\/drive.google.com\/file\/d\/15Fhmt5aM_b0J5jtE9mdWInQPfDS3TqVw"},{"key":"9865_CR49","doi-asserted-by":"crossref","unstructured":"Sur C (2019) Survey of deep learning and architectures for visual captioning\u2014transitioning between media and natural languages. Multimed Tools Appl 1\u201351","DOI":"10.1007\/s11042-019-08021-1"},{"key":"9865_CR50","unstructured":"Sur C (2019) TPsgtR neural-symbolic tensor product scene-graph-triplet representation for image captioning. arXiv:1911.10115"},{"key":"9865_CR51","unstructured":"Sur Chiranjib. (2019) Tpsgtr Neural-symbolic tensor product scene-graph-triplet representation for image captioning arXiv:1911.10115"},{"issue":"4","key":"9865_CR52","doi-asserted-by":"publisher","first-page":"689","DOI":"10.1007\/s12065-019-00278-7","volume":"12","author":"C Sur","year":"2019","unstructured":"Sur C (2019) UCRLF: unified constrained reinforcement learning framework for phase-aware architectures for autonomous vehicle signaling and trajectory optimization. Evol Intell 12(4):689\u2013712","journal-title":"Evol Intell"},{"key":"9865_CR53","unstructured":"Sur C (2020) aiTPR attribute interaction-tensor product representation for image caption. arXiv:2001.09545"},{"key":"9865_CR54","unstructured":"Sur C (2020) Gaussian Smoothen Semantic Features (GSSF)\u2013exploring the linguistic aspects of visual captioning in Indian languages (Bengali) using mscoco framework. arXiv:2002.06701"},{"key":"9865_CR55","unstructured":"Sur C (2020) SACT self-aware multi-space feature composition transformer for multinomial attention for video captioning. arXiv:2006.14262"},{"key":"9865_CR56","unstructured":"Sur C (2020) MRRC multiple role representation crossover interpretation for image captioning with R-CNN Feature Distribution Composition (FDC). arXiv:2002.06436"},{"key":"9865_CR57","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/s42979-020-00238-4","volume":"1","author":"C Sur","year":"2020","unstructured":"Sur C (2020) AACR feature fusion effects of algebraic amalgamation composed representation on (de)compositional network for caption generation for images. SN Comput Sci 1:229. https:\/\/doi.org\/10.1007\/s42979-020-00238-4","journal-title":"SN Comput Sci"},{"key":"9865_CR58","unstructured":"Sur C (2020) Gaussian Smoothen Semantic Features (GSSF)\u2013exploring the linguistic aspects of visual captioning in Indian languages (Bengali) using MSCOCO framework. arXiv:2002.06701"},{"key":"9865_CR59","unstructured":"Sur C (2020) MRRC: multiple role representation crossover interpretation for image captioning with R-CNN Feature Distribution Composition (FDC) arXiv:2002.06436"},{"key":"9865_CR60","unstructured":"Sur C (2020) aiTPR attribute interaction-tensor product representation for image caption. arXiv:2001.09545"},{"issue":"1","key":"9865_CR61","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1007\/s42452-019-1765-9","volume":"2","author":"C Sur","year":"2020","unstructured":"Sur C (2020) RBN: enhancement in language attribute prediction using global representation of natural language transfer learning technology like Google BERT. SN Appl Sci 2(1):22","journal-title":"SN Appl Sci"},{"key":"9865_CR62","unstructured":"Sur C (2020) Self-Segregating and Coordinated-Segregating Transformer for Focused Deep Multi-Modular Network for Visual Question Answering. arXiv preprint arXiv:2006.14264"},{"key":"9865_CR63","unstructured":"Sur C (2020) ReLGAN: Generalization of Consistency for GAN with Disjoint Constraints and Relative Learning of Generative Processes for Multiple Transformation Learning. arXiv preprint arXiv:2006.07809"},{"key":"9865_CR64","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1007\/s42979-020-00234-8","volume":"1","author":"C Sur","year":"2020","unstructured":"Sur C (2020) GenAtSeq GAN with Heuristic Reforms for Knowledge Centric Network with Browsing Characteristics Learning, Individual Tracking and Malware Detection with Website2Vec. Sn Comput Sci 1:228. https:\/\/doi.org\/10.1007\/s42979-020-00234-8","journal-title":"Sn Comput Sci"},{"key":"9865_CR65","doi-asserted-by":"crossref","unstructured":"Sur C, Liu P, Zhou Y, Dapeng W u (2019) Semantic tensor product for image captioning. In: 2019 5th international conference on Big Data Computing and Communications (BIGCOM). IEEE, pp 33\u201337","DOI":"10.1109\/BIGCOM.2019.00013"},{"key":"9865_CR66","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1016\/j.neucom.2018.12.026","volume":"333","author":"YH Tan","year":"2019","unstructured":"Tan YH, Chan CS (2019) Phrase-based image caption generator with hierarchical LSTM network. Neurocomputing 333:86\u2013100","journal-title":"Neurocomputing"},{"key":"9865_CR67","doi-asserted-by":"crossref","unstructured":"Tavakoliy HR, Shetty R, Borji A, Laaksonen J (2017) Paying attention to descriptions generated by image captioning models. In: Computer Vision (ICCV), 2017 IEEE International Conference on. IEEE, pp 2506\u20132515","DOI":"10.1109\/ICCV.2017.272"},{"issue":"4","key":"9865_CR68","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2016.2587640","volume":"39","author":"O Vinyals","year":"2017","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2017) Show and tell: lessons learned from the 2015 mscoco image captioning challenge. IEEE Trans Pattern Anal Mach Intell 39(4):652\u2013663","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"9865_CR69","doi-asserted-by":"crossref","unstructured":"Vinyals O et al (2015) Show and tell: A neural image caption generator. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"9865_CR70","doi-asserted-by":"crossref","unstructured":"Wang Y, Lin Z, Shen X, Cohen S, Cottrell GW (2017) Skeleton key: Image captioning by skeleton-attribute decomposition. arXiv:1704.06972","DOI":"10.1109\/CVPR.2017.780"},{"key":"9865_CR71","first-page":"40","volume":"14.2s","author":"C Wang","year":"2018","unstructured":"Wang C, Yang H, Meinel C (2018) Image captioning with deep bidirectional lstms and multi-task learning. ACM Trans Multimed Comput Commun Appl (TOMM) 14.2s:40","journal-title":"ACM Trans Multimed Comput Commun Appl (TOMM)"},{"key":"9865_CR72","unstructured":"Wu J, Hu Z, Mooney RJ (2018) Joint image captioning and question answering. arXiv:1805.08389"},{"key":"9865_CR73","doi-asserted-by":"crossref","unstructured":"Wu T, Ku T, Zhang H (2020) Research for image caption based on global attention mechanism. In: Second target recognition and artificial intelligence summit forum. International society for optics and photonics, vol 11427","DOI":"10.1117\/12.2552711"},{"key":"9865_CR74","doi-asserted-by":"crossref","unstructured":"Wu Q, Shen C, Wang P, Dick A, van den Hengel A (2017), Image captioning and visual question answering based on attributes and external knowledge. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2017.2708709"},{"key":"9865_CR75","doi-asserted-by":"crossref","unstructured":"Wu C, Wei Y, Chu X, Su F, Wang L (2018) Modeling visual and word-conditional semantic attention for image captioning. Signal Process Image Commun","DOI":"10.1016\/j.image.2018.06.002"},{"key":"9865_CR76","unstructured":"Xu K et al (2015) Show, attend and tell: neural image caption generation with visual attention. In: International conference on machine learning"},{"key":"9865_CR77","doi-asserted-by":"crossref","unstructured":"Yao T, Pan Y, Li Y, Mei T (2017) Incorporating copying mechanism in image captioning for learning novel objects. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, pp 5263\u20135271","DOI":"10.1109\/CVPR.2017.559"},{"key":"9865_CR78","doi-asserted-by":"crossref","unstructured":"Yao T, Pan Y, Li Y, Qiu Z, Mei T (2017) Boosting image captioning with attributes. In: IEEE international conference on computer vision, ICCV, pp 22\u201329","DOI":"10.1109\/ICCV.2017.524"},{"key":"9865_CR79","doi-asserted-by":"crossref","unstructured":"Ye S, Liu N, Han J (2018) Attentive linear transformation for image captioning. IEEE Trans Image Process","DOI":"10.1109\/TIP.2018.2855406"},{"key":"9865_CR80","unstructured":"You Q, Jin H, Luo J (2018) Image captioning at will: a versatile scheme for effectively injecting sentiments into image descriptions. arXiv:1801.10121"},{"key":"9865_CR81","doi-asserted-by":"crossref","unstructured":"You Q et al (2016) Image captioning with semantic attention. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2016.503"},{"key":"9865_CR82","unstructured":"Zhang L, Sung F, Liu F, Xiang T, Gong S, Yang Y, Hospedales TM (2017) Actor-critic sequence training for image captioning. arXiv:1706.09601"},{"key":"9865_CR83","doi-asserted-by":"crossref","unstructured":"Zhang M, Yang Y, Zhang H, Ji Y, Shen HT, Chua TS (2018) More is Better: precise and detailed image captioning using online positive recall and missing concepts mining. IEEE Trans Image Process","DOI":"10.1109\/TIP.2018.2855415"},{"key":"9865_CR84","doi-asserted-by":"crossref","unstructured":"Zhao W, Wang B, Ye J, Yang M, Zhao Z, Luo R, Qiao Y (2018) A multi-task learning approach for image captioning. In: IJCAI, pp 1205\u20131211","DOI":"10.24963\/ijcai.2018\/168"},{"key":"9865_CR85","unstructured":"Zheng J et al (2019) Image captioning with integrated bottom-up and multi-level residual top-down attention for game scene understanding. arXiv:1906.06632"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09865-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-020-09865-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09865-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,28]],"date-time":"2022-11-28T04:35:16Z","timestamp":1669610116000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-020-09865-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,16]]},"references-count":85,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2021,3]]}},"alternative-id":["9865"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-09865-8","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2020,11,16]]},"assertion":[{"value":"21 July 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 September 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 November 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}