{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T07:24:08Z","timestamp":1744183448954,"version":"3.37.3"},"reference-count":20,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2019,2,20]],"date-time":"2019-02-20T00:00:00Z","timestamp":1550620800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Process Lett"],"published-print":{"date-parts":[[2019,8]]},"DOI":"10.1007\/s11063-019-10005-z","type":"journal-article","created":{"date-parts":[[2019,2,20]],"date-time":"2019-02-20T01:43:44Z","timestamp":1550627024000},"page":"1005-1017","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Image Captioning Using Region-Based Attention Joint with Time-Varying Attention"],"prefix":"10.1007","volume":"50","author":[{"given":"Weixuan","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4884-323X","authenticated-orcid":false,"given":"Haifeng","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,2,20]]},"reference":[{"key":"10005_CR1","doi-asserted-by":"crossref","unstructured":"Chen L, Zhang H, Xiao J, Nie L, Shao J, Liu W, Chua TS (2017) Sca-cnn: spatial and channel-wise attention in convolutional networks for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6298\u20136306","DOI":"10.1109\/CVPR.2017.667"},{"key":"10005_CR2","unstructured":"Denkowski M, Lavie A (2011) Meteor 1.3: automatic metric for reliable optimization and evaluation of machine translation systems. In: Proceedings of the sixth workshop on statistical machine translation. Association for Computational Linguistics, pp 85\u201391"},{"issue":"12","key":"10005_CR3","doi-asserted-by":"publisher","first-page":"2321","DOI":"10.1109\/TPAMI.2016.2642953","volume":"39","author":"K Fu","year":"2017","unstructured":"Fu K, Jin J, Cui R, Sha F, Zhang C (2017) Aligning where to see and what to tell: image captioning with region-based attention and scene-specific contexts. IEEE Trans Pattern Anal Mach Intell 39(12):2321\u20132334","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"10005_CR4","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"10005_CR5","doi-asserted-by":"crossref","unstructured":"Karpathy A, Fei-Fei L (2015) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3128\u20133137","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"10005_CR6","unstructured":"Lin C (2004) Rouge: a package for automatic evaluation of summaries. Meeting of the association for computational linguistics, pp 74\u201381"},{"key":"10005_CR7","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European conference on computer vision. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"10005_CR8","unstructured":"Mao J, Xu W, Yang Y, Wang J, Huang Z, Yuille AL (2015) Deep captioning with multimodal recurrent neural networks (m-rnn). In: Proceedings of international conference on learning representations"},{"key":"10005_CR9","unstructured":"Papineni K, Roukos S, Ward T, Zhu WJ (2002) Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting on association for computational linguistics. Association for Computational Linguistics, pp 311\u2013318"},{"key":"10005_CR10","doi-asserted-by":"crossref","unstructured":"Pedersoli M, Lucas T, Schmid C, Verbeek JJ (2017) Areas of attention for image captioning. In: Proceedings of international conference on computer vision, pp 1251\u20131259","DOI":"10.1109\/ICCV.2017.140"},{"key":"10005_CR11","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, pp 91\u201399"},{"issue":"7","key":"10005_CR12","doi-asserted-by":"publisher","first-page":"1476","DOI":"10.1109\/TPAMI.2016.2601099","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Zhang X, Sun J (2017) Object detection networks on convolutional feature maps. IEEE Trans Pattern Anal Mach Intell 39(7):1476\u20131481","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"2","key":"10005_CR13","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings JR, Van De Sande KE, Gevers T, Smeulders AW (2013) Selective search for object recognition. Int J Comput Vis 104(2):154\u2013171","journal-title":"Int J Comput Vis"},{"key":"10005_CR14","doi-asserted-by":"crossref","unstructured":"Vedantam R, Lawrence\u00a0Zitnick C, Parikh D (2015) Cider: consensus-based image description evaluation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4566\u20134575","DOI":"10.1109\/CVPR.2015.7299087"},{"issue":"15","key":"10005_CR15","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1049\/el.2017.0326","volume":"53","author":"W Wang","year":"2017","unstructured":"Wang W, Hu H (2017) Multimodal object description network for dense captioning. Electron Lett 53(15):1041\u20131042","journal-title":"Electron Lett"},{"key":"10005_CR16","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhudinov R, Zemel R, Bengio, Y.: Show, attend and tell: neural image caption generation with visual attention. In: International conference on machine learning, pp 2048\u20132057"},{"issue":"22","key":"10005_CR17","doi-asserted-by":"publisher","first-page":"1471","DOI":"10.1049\/el.2017.2351","volume":"53","author":"L Yang","year":"2017","unstructured":"Yang L, Hu H (2017) Tvprnn for image caption generation. Electron Lett 53(22):1471\u20131473","journal-title":"Electron Lett"},{"key":"10005_CR18","doi-asserted-by":"crossref","unstructured":"You Q, Jin H, Wang Z, Fang C, Luo J (2016) Image captioning with semantic attention. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4651\u20134659","DOI":"10.1109\/CVPR.2016.503"},{"key":"10005_CR19","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young P, Lai A, Hodosh M, Hockenmaier J (2014) From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions. Trans Assoc Comput Linguist 2:67\u201378","journal-title":"Trans Assoc Comput Linguist"},{"key":"10005_CR20","doi-asserted-by":"crossref","unstructured":"Zeng X, Ouyang W, Yang B, Yan J, Wang X (2016) Gated bi-directional cnn for object detection. In: European conference on computer vision. Springer, pp 354\u2013369","DOI":"10.1007\/978-3-319-46478-7_22"}],"container-title":["Neural Processing Letters"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11063-019-10005-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-019-10005-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11063-019-10005-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,19]],"date-time":"2020-02-19T19:22:05Z","timestamp":1582140125000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11063-019-10005-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,2,20]]},"references-count":20,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2019,8]]}},"alternative-id":["10005"],"URL":"https:\/\/doi.org\/10.1007\/s11063-019-10005-z","relation":{},"ISSN":["1370-4621","1573-773X"],"issn-type":[{"type":"print","value":"1370-4621"},{"type":"electronic","value":"1573-773X"}],"subject":[],"published":{"date-parts":[[2019,2,20]]},"assertion":[{"value":"20 February 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}