{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T07:34:38Z","timestamp":1763105678241},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"14","license":[{"start":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T00:00:00Z","timestamp":1573257600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T00:00:00Z","timestamp":1573257600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1007\/s00521-019-04587-x","type":"journal-article","created":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T08:03:07Z","timestamp":1573286587000},"page":"10471-10479","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Topic sensitive image descriptions"],"prefix":"10.1007","volume":"32","author":[{"given":"Usman","family":"Zia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"M. Mohsin","family":"Riaz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abdul","family":"Ghafoor","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Syed Sohaib","family":"Ali","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,11,9]]},"reference":[{"issue":"5","key":"4587_CR1","doi-asserted-by":"publisher","first-page":"1072","DOI":"10.1109\/TPAMI.2017.2721945","volume":"40","author":"A Ramisa","year":"2018","unstructured":"Ramisa A, Yan F, Moreno-Noguer F, Mikolajczyk K (2018) Breakingnews: article annotation by image and text processing. IEEE Trans Pattern Anal Mach Intell 40(5):1072\u20131085","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4587_CR2","unstructured":"Ling H, Fidler S (2017) Teaching machines to describe images via natural language feedback. In: NIPS"},{"key":"4587_CR3","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: a neural image caption generator. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), Boston MA, pp 3156\u20133164","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"4587_CR4","doi-asserted-by":"crossref","unstructured":"Fang H, Gupta S, Iandola F, Srivastava R, Deng L, Dollar P, Gao J, He X, Mitchell M, Platt J, Zitnick CL, Zweig G (2015) From captions to visual concepts and back. In: IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"4587_CR5","doi-asserted-by":"crossref","unstructured":"Elliott D, de Vries AP (2015) Describing images using inferred visual dependency representations. In: Annual meeting of the association for computational linguistics","DOI":"10.3115\/v1\/P15-1005"},{"key":"4587_CR6","unstructured":"Tan YH, Chan CS (2016) phi-LSTM: a phrase-based hierarchical LSTM model for image captioning. In: ACCV"},{"key":"4587_CR7","doi-asserted-by":"crossref","unstructured":"You Q, Jin H, Wang Z, Fang C, Luo J (2016) Image captioning with semantic attention. In: 2016 IEEE conference on computer vision and pattern recognition (CVPR), pp 4651\u20134659","DOI":"10.1109\/CVPR.2016.503"},{"issue":"4","key":"4587_CR8","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1109\/TPAMI.2016.2598339","volume":"39","author":"A Karpathy","year":"2017","unstructured":"Karpathy A, Fei-Fei L (2017) Deep visual-semantic alignments for generating image descriptions. IEEE Trans Pattern Anal Mach Intell 39(4):664\u2013676","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"4587_CR9","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1109\/TPAMI.2016.2599174","volume":"39","author":"J Donahue","year":"2017","unstructured":"Donahue J, Hendricks LA, Rohrbach M, Venugopalan S, Guadarrama S, Saenko K, Darrell T (2017) Long-term recurrent convolutional networks for visual recognition and description. IEEE Trans Pattern Anal Mach Intell 39(4):677\u2013691","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4587_CR10","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. In: ICLR Workshop"},{"key":"4587_CR11","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C (2014) Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pp 1532-1543","DOI":"10.3115\/v1\/D14-1162"},{"issue":"1","key":"4587_CR12","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1214\/07-AOAS114","volume":"1","author":"D Blei","year":"2007","unstructured":"Blei D, Lafferty J (2007) A correlated topic model of science. Ann Appl Stat 1(1):17\u201335","journal-title":"Ann Appl Stat"},{"key":"4587_CR13","doi-asserted-by":"crossref","unstructured":"Chen B (2009) Latent topic modelling of word co-occurence information for spoken document retrieval. In: 2009 IEEE international conference on acoustics, speech and signal processing. Taipei, pp 3961\u20133964","DOI":"10.1109\/ICASSP.2009.4960495"},{"key":"4587_CR14","doi-asserted-by":"crossref","unstructured":"Socher R, Karpathy A, Le QV, Manning CD, Ng A (2014) Grounded compositional semantics for finding and describing images with sentences. In: Transactions of the association for computational linguistics, pp 207218","DOI":"10.1162\/tacl_a_00177"},{"key":"4587_CR15","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li Kai, Fei-Fei Li (2009) ImageNet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"4587_CR16","doi-asserted-by":"crossref","unstructured":"Donahue J, Hendricks LA, Guadarrama S, Rohrbach M, Venugopalan S, Saenko K, Darrell T (2015) Long-term recurrent convolutional networks for visual recognition and description. In: IEEE conference on computer vision and pattern recognition","DOI":"10.21236\/ADA623249"},{"key":"4587_CR17","doi-asserted-by":"crossref","unstructured":"Jia X, Gavves E, Fernando B, Tuytelaars T (2015) Guiding the long-short term memory model for image caption generation.\u00a0In: 2015 IEEE international conference on computer vision (ICCV), pp. 2407\u20132415","DOI":"10.1109\/ICCV.2015.277"},{"key":"4587_CR18","doi-asserted-by":"crossref","unstructured":"Chen X, Zitnick CL (2015) Minds eye: a recurrent visual representation for image caption generation. In: IEEE Conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"4587_CR19","unstructured":"Xu K, Ba J, Kiros R, Cho K, Courville A, Salakhutdinov R, Zemel R, Bengio Y (2015) Show, attend and tell: neural image caption generation with visual attention.\u00a0In: International conference on machine learning, pp. 2048\u20132057"},{"key":"4587_CR20","doi-asserted-by":"crossref","unstructured":"Wu Q, Shen C, Liu L, Dick A, van den Hengel A (2016) What value do explicit high level concepts have in vision to language problems? In: CVPR","DOI":"10.1109\/CVPR.2016.29"},{"key":"4587_CR21","unstructured":"Jin J, Fu K, Cui R, Sha F, Zhang C (2015) Aligning where to see and what to tell: image caption with region-based attention and scene factorization. arXiv:1506.06272"},{"issue":"2","key":"4587_CR22","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"J Uijlings","year":"2013","unstructured":"Uijlings J, van de Sande K, Gevers T, Smeulders A (2013) Selective search for object recognition. IJCV 104(2):154\u2013171","journal-title":"IJCV"},{"key":"4587_CR23","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: ICLR"},{"key":"4587_CR24","unstructured":"Yang Z, Yuan Y, Wu Y, Salakhutdinov R, Cohen W (2016) Encode, review, and decode: reviewer module for caption generation. In: NIPS"},{"key":"4587_CR25","doi-asserted-by":"crossref","unstructured":"Pedersoli M, Lucas T, Schmid C, Verbeek J (2017) Areas of attention for image captioning. In: CVPR","DOI":"10.1109\/ICCV.2017.140"},{"key":"4587_CR26","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR","DOI":"10.1109\/CVPR.2014.81"},{"issue":"3","key":"4587_CR27","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2014","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg AC, Fei-Fei L (2014) Imagenet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252","journal-title":"Int J Comput Vis"},{"key":"4587_CR28","doi-asserted-by":"crossref","unstructured":"Garten J, Sagae K, Ustun V, Dehghani M (2015) Combining distributed vector representations for words. In: Proceedings of the 1st workshop on vector space modeling for natural language processing, pp 95\u2013101","DOI":"10.3115\/v1\/W15-1513"},{"key":"4587_CR29","doi-asserted-by":"crossref","unstructured":"Fadaee M, Bisazza A, Monz C (2017) Learning topic-sensitive word representations. In: Proceedings of the 55th annual meeting of the association for computational linguistics. Association for Computational Linguistics, pp 441\u2013447","DOI":"10.18653\/v1\/P17-2070"},{"key":"4587_CR30","doi-asserted-by":"crossref","unstructured":"Asuncion HU, Asuncion AU, Taylor RN (2010) Software traceability with topic modeling. In: 2010 ACM\/IEEE 32nd international conference on software engineering, pp 95\u2013104","DOI":"10.1145\/1806799.1806817"},{"key":"4587_CR31","doi-asserted-by":"crossref","unstructured":"Aldous DJ (1985) Exchangeability and related topics. In: \u00c9cole d\u2019\u00c9t\u00e9 de Probabilit\u00e9s de Saint-Flour XIII\u20141983, pp 1\u2013198","DOI":"10.1007\/BFb0099421"},{"issue":"5","key":"4587_CR32","doi-asserted-by":"publisher","first-page":"855","DOI":"10.1109\/TPAMI.2008.137","volume":"31","author":"A Graves","year":"2009","unstructured":"Graves A, Liwicki M, Fern\u00e1ndez S, Bertolami R, Bunke H, Schmidhuber J (2009) A novel connectionist system for unconstrained handwriting recognition. IEEE Trans Pattern Anal Mach Intell 31(5):855\u2013868","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4587_CR33","doi-asserted-by":"crossref","unstructured":"Sak H, Senior A, Beaufays F (2014) Long short-term memory recurrent neural network architectures for large scale acoustic modeling. In: Fifteenth annual conference of the international speech communication association","DOI":"10.21437\/Interspeech.2014-80"},{"key":"4587_CR34","first-page":"853","volume":"47","author":"M Hodosh","year":"2013","unstructured":"Hodosh M, Young P, Hockenmaier J (2013) Framing image description as a ranking task: data, models and evaluation metrics. J Artif Intell 47:853\u2013899","journal-title":"J Artif Intell"},{"key":"4587_CR35","first-page":"67","volume":"2","author":"P Young","year":"2014","unstructured":"Young P, Lai A, Hodosh M, Hockenmaier J (2014) From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans Assoc Comput Lin 2:67\u201378","journal-title":"Trans Assoc Comput Lin"},{"issue":"4","key":"4587_CR36","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2016.2587640","volume":"39","author":"O Vinyals","year":"2017","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2017) Show and tell: lessons learned from the 2015 MSCOCO image captioning challenge. IEEE Trans Pattern Anal Mach Intell 39(4):652\u2013663","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4587_CR37","first-page":"1097","volume":"2","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 2:1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"key":"4587_CR38","doi-asserted-by":"crossref","unstructured":"Papineni K, Roukos S, Ward T, Zhu WJ (2002) BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting on association for computational linguistics","DOI":"10.3115\/1073083.1073135"},{"key":"4587_CR39","unstructured":"Banerjee S, Lavie A (2005) METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization, pp 65\u201372"},{"key":"4587_CR40","doi-asserted-by":"crossref","unstructured":"Vedantam R, Zitnick CL, Parikh D (2015) CIDEr: consensus-based image description evaluation. In: 2015 IEEE conference on computer vision and pattern recognition (CVPR), pp 4566\u20134575","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"4587_CR41","unstructured":"Chen X, Fang H, Lin TY, Vedantam R, Gupta S, Doll\u00e1r P, Zitnick CL (2015) Microsoft COCO captions: data collection and evaluation server. arXiv preprint arXiv:1504.00325"},{"key":"4587_CR42","unstructured":"Mao J, Xu W, Yang Y, Wang J, Yuille AL (2015) Explain images with multimodal recurrent neural networks. In: ICLR"},{"key":"4587_CR43","doi-asserted-by":"crossref","unstructured":"Aneja J, Aditya D, Alexander SG (2018) Convolutional image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2018.00583"},{"key":"4587_CR44","doi-asserted-by":"crossref","unstructured":"Wang Y, Lin Z, Shen X, Cohen S, Cottrell GW (2017) Skeleton key: image captioning by skeleton-attribute decomposition. In: Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2017.780"},{"key":"4587_CR45","unstructured":"Wang Q, Chan AB (2018) CNN+ CNN: convolutional decoders for image captioning. arXiv preprint arXiv:1805.09019"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-019-04587-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s00521-019-04587-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-019-04587-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,4]],"date-time":"2022-10-04T05:53:56Z","timestamp":1664862836000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s00521-019-04587-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,9]]},"references-count":45,"journal-issue":{"issue":"14","published-print":{"date-parts":[[2020,7]]}},"alternative-id":["4587"],"URL":"https:\/\/doi.org\/10.1007\/s00521-019-04587-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,11,9]]},"assertion":[{"value":"17 October 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 October 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}