{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T11:54:08Z","timestamp":1761998048003,"version":"build-2065373602"},"reference-count":24,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/icme.2017.8019408","type":"proceedings-article","created":{"date-parts":[[2017,9,7]],"date-time":"2017-09-07T01:03:50Z","timestamp":1504746230000},"page":"361-366","source":"Crossref","is-referenced-by-count":15,"title":["Image captioning with deep LSTM based on sequential residual"],"prefix":"10.1109","author":[{"given":"Kaisheng","family":"Xu","sequence":"first","affiliation":[]},{"given":"Hanli","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Pengjie","family":"Tang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"4753","article-title":"Dis-turblabel: Regularizing CNN on the loss layer","author":"xie","year":"2016","journal-title":"Proceedings of CVPR'16"},{"key":"ref11","article-title":"Deep captioning with multimodal recurrent neural networks (m-RNN)","author":"mao","year":"2015","journal-title":"Proc ICLR'15"},{"key":"ref12","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"Proc ICLR'14"},{"key":"ref13","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proc ICML'15"},{"key":"ref14","first-page":"311","article-title":"BLEU: a method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proc ACL'02"},{"key":"ref15","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume":"29","author":"banerjee","year":"2005","journal-title":"Proc ACL Workshop IEEMMTS'05"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.29"},{"journal-title":"Highway networks","year":"2015","author":"srivastava","key":"ref18"},{"key":"ref19","first-page":"2377","article-title":"Training very deep networks","author":"srivastava","year":"2015","journal-title":"Proc NIPS 15"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref3","first-page":"595","article-title":"Multimodal neural language models","volume":"14","author":"kiros","year":"2014","journal-title":"Proc of ICML-14"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","article-title":"Treetalk: Composition and compression of trees for image descriptions","volume":"2","author":"kuznetsova","year":"2014","journal-title":"Trans Assoc Comput Linguist"},{"key":"ref1","first-page":"15","article-title":"Every picture tells a story: Generating sentences from images","author":"farhadi","year":"2010","journal-title":"Proceedings of the ECCV'10"},{"key":"ref9","first-page":"1929","article-title":"Dropout: a simple wa $y$ to prevent neural networks from overfitting","volume":"15","author":"srivastava","year":"2014","journal-title":"J Machine Learning Research"},{"key":"ref20","first-page":"1058","article-title":"Regularization of neural networks using DropConnect","author":"wan","year":"2013","journal-title":"Proc ICM'13"},{"key":"ref22","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"Proc ECCV'14"},{"key":"ref21","first-page":"28","article-title":"Swapout: Learning an ensemble of deep architectures","author":"singh","year":"2016","journal-title":"Proceedings NIPS 16"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.277"},{"key":"ref23","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1613\/jair.3994","article-title":"Framing image description as a ranking task: Data, models and evaluation metrics","volume":"47","author":"hodosh","year":"2013","journal-title":"J Artificial Intelligence Research"}],"event":{"name":"2017 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2017,7,10]]},"location":"Hong Kong","end":{"date-parts":[[2017,7,14]]}},"container-title":["2017 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8014303\/8019290\/08019408.pdf?arnumber=8019408","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,4,7]],"date-time":"2020-04-07T02:32:25Z","timestamp":1586226745000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8019408\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icme.2017.8019408","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}