{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:51:46Z","timestamp":1771699906837,"version":"3.50.1"},"reference-count":40,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,12]]},"DOI":"10.1109\/iccv.2015.277","type":"proceedings-article","created":{"date-parts":[[2016,2,19]],"date-time":"2016-02-19T23:23:49Z","timestamp":1455924229000},"page":"2407-2415","source":"Crossref","is-referenced-by-count":317,"title":["Guiding the Long-Short Term Memory Model for Image Caption Generation"],"prefix":"10.1109","author":[{"given":"Xu","family":"Jia","sequence":"first","affiliation":[]},{"given":"Efstratios","family":"Gavves","sequence":"additional","affiliation":[]},{"given":"Basura","family":"Fernando","sequence":"additional","affiliation":[]},{"given":"Tinne","family":"Tuytelaars","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Corpus-guided sentence generation of natural images","author":"yang","year":"2011","journal-title":"EMNLP"},{"key":"ref38","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"ICML"},{"key":"ref33","article-title":"Going deeper with convolutions","author":"szmoegedy","year":"2014","journal-title":"CVPR"},{"key":"ref32","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"NIPS"},{"key":"ref31","article-title":"Generating text with recurrent neural networks","author":"sutskever","year":"2011","journal-title":"ICML"},{"key":"ref30","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"ICLRE"},{"key":"ref37","article-title":"A neural image caption generator","author":"vinyals","year":"2015","journal-title":"CVPR"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref35","article-title":"Matconvnet - convolutional neural networks for matlab","author":"vedaldi","year":"2014","journal-title":"CoRR abs\/1412 4564"},{"key":"ref34","article-title":"Leccture 6.5 - rmsprop","author":"tieleman","year":"2000","journal-title":"Technical Report MSU-CSE-00-2"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0658-4"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1162\/tacl_a_00166","article-title":"From image descriptions to visual denotations: New similarity metrics for semantic inference over event descriptions","volume":"2","author":"young","year":"2014","journal-title":"TACL"},{"key":"ref11","article-title":"Sequence transduction with recurrent neural networks","author":"graves","year":"2012","journal-title":"CoRR abs\/1211 3711"},{"key":"ref12","article-title":"Generating sequences with recurrent neural networks","author":"graves","year":"2013","journal-title":"CoRR abs\/1308 0850"},{"key":"ref13","author":"greff","year":"2015","journal-title":"LSTM A search space odyssey CoRR abs\/1503 04069"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref15","first-page":"853","article-title":"Framing image description as a ranking task: Data, models and evaluation metrics","volume":"47","author":"hodosh","year":"2013","journal-title":"lAIR"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/28.3-4.321"},{"key":"ref17","article-title":"Deep visual-semantic alignments for generating image descriptions","author":"karpathy","year":"2015","journal-title":"CVPR"},{"key":"ref18","article-title":"Deep fragment embed-dings for bidirectional image sentence mapping","author":"karpathy","year":"2014","journal-title":"NIPS"},{"key":"ref19","article-title":"Multimodal neural language models","author":"kiros","year":"2014","journal-title":"ICML"},{"key":"ref28","article-title":"Generating image descriptions from computer vision detections","author":"mitchell","year":"2012","journal-title":"EACL"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref27","article-title":"N onparametric method for data-driven image captioning","author":"mason","year":"2014","journal-title":"ACL"},{"key":"ref3","article-title":"Mind's eye:a recurrent visual representation for image caption generation","author":"chen","year":"2015","journal-title":"CVP R"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3348"},{"key":"ref29","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"ACL"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref8","article-title":"From captions to visual concepts and back","author":"fang","year":"2015","journal-title":"CVPR"},{"key":"ref7","article-title":"Long-term recurrent convolutional networks for visual recognition and description","author":"donahue","year":"2015","journal-title":"CVPR"},{"key":"ref2","author":"bird","year":"2009","journal-title":"Natural Language Processing With Python"},{"key":"ref9","article-title":"Every picture tells a story: Generating sentences from images","author":"farhadi","year":"2010","journal-title":"ECCV"},{"key":"ref1","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2015","journal-title":"ICLRE"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.162"},{"key":"ref22","article-title":"Generalizing image captions for image-text parallel corpus","author":"kuznetsova","year":"2013","journal-title":"ACL"},{"key":"ref21","article-title":"Collective generation of natural image descriptions","author":"kuznetsova","year":"2012","journal-title":"ACL"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3115\/1626355.1626389"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","article-title":"Treetalk: Composition and compression of trees for image descriptions","volume":"2","author":"kuznetsova","year":"2014","journal-title":"TACL"},{"key":"ref26","article-title":"Deep captioning with multimodal recurrent neural networks (m-rnn)","author":"mao","year":"2015","journal-title":"ICLRE"},{"key":"ref25","article-title":"Microsoft COCO: common objects in context","author":"lin","year":"2014","journal-title":"ECCV"}],"event":{"name":"2015 IEEE International Conference on Computer Vision (ICCV)","location":"Santiago, Chile","start":{"date-parts":[[2015,12,7]]},"end":{"date-parts":[[2015,12,13]]}},"container-title":["2015 IEEE International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7407725\/7410356\/07410634.pdf?arnumber=7410634","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,9,15]],"date-time":"2020-09-15T21:06:39Z","timestamp":1600203999000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7410634\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,12]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/iccv.2015.277","relation":{},"subject":[],"published":{"date-parts":[[2015,12]]}}}