{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:43:33Z","timestamp":1761597813531,"version":"3.37.3"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2018,4,1]],"date-time":"2018-04-01T00:00:00Z","timestamp":1522540800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2015R1C1A1A02036562"],"award-info":[{"award-number":["2015R1C1A1A02036562"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2018,4,1]]},"DOI":"10.1109\/tpami.2017.2700381","type":"journal-article","created":{"date-parts":[[2017,5,2]],"date-time":"2017-05-02T18:23:31Z","timestamp":1493749411000},"page":"945-957","source":"Crossref","is-referenced-by-count":21,"title":["Retrieval of Sentence Sequences for an Image Stream via Coherence Recurrent Convolutional Networks"],"prefix":"10.1109","volume":"40","author":[{"given":"Cesc Chunseong","family":"Park","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9543-7453","authenticated-orcid":false,"given":"Youngjin","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Gunhee","family":"Kim","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.337"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.340"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.493"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.340"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578767"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33783-3_10"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.61"},{"key":"ref36","first-page":"1993","article-title":"Ranking and retrieval of image sequences from multiple paragraph queries","author":"kim","year":"2015","journal-title":"Proc IEEE Conf Comput Vis Pattern Recognit"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.455"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995711"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1147"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995329"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"853","DOI":"10.1613\/jair.3994","article-title":"Framing image description as a ranking task: Data, models and evaluation metrics","volume":"47","author":"hodosh","year":"2013","journal-title":"J Artif Intell Res"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10593-2_35"},{"article-title":"Statistical language models based on neural networks","year":"2012","author":"mikolov","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.211"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"ref23","first-page":"2048","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.118"},{"key":"ref25","article-title":"Deep captioning with multimodal recurrent neural networks (m-RNN)","author":"mao","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/P14-5010"},{"key":"ref51","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"article-title":"Lecture 6.5&#x2013;RMSProp","year":"2012","author":"tieleman","key":"ref53"},{"key":"ref52","doi-asserted-by":"crossref","first-page":"339","DOI":"10.1016\/0893-6080(88)90007-X","article-title":"Generalization of backpropagation with application to a recurrent gas market model","volume":"1","author":"werbos","year":"1988","journal-title":"Neural Netw"},{"key":"ref10","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2005.06.042"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24947-6_17"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1162\/coli.2008.34.1.1"},{"key":"ref13","first-page":"311","article-title":"BLEU: A method for automatic evaluation of machine translation","author":"papineni","year":"2002","journal-title":"Proc Annual Meeting of the Assoc Computational Linguistics"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref15","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","author":"lavie","year":"2005","journal-title":"Proc 32nd Ann Meeting Assoc for Computational Linguistics"},{"key":"ref16","first-page":"73","article-title":"Expressing an image stream with a sequence of natural sentences","author":"park","year":"2015","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref17","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proc 28th Int Conf Mach Learn"},{"key":"ref18","first-page":"2222","article-title":"Multimodal learning with deep Boltzmann machines","author":"srivastava","year":"2012","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"ref4","first-page":"595","article-title":"Multimodal neural language models","author":"kiros","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","article-title":"TreeTalk: Composition and compression of trees for image descriptions","volume":"2","author":"kuznetsova","year":"2014","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"ref8","first-page":"1143","article-title":"Im2Text: Describing images using 1 million captioned photographs","author":"ordonez","year":"2011","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1162\/tacl_a_00177","article-title":"Grounded compositional semantics for finding and describing images with sentences","volume":"2","author":"socher","year":"2013","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref49","first-page":"1188","article-title":"Distributed representations of sentences and documents","author":"le","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"journal-title":"Natural Language Processing With Python","year":"2009","author":"bird","key":"ref46"},{"key":"ref45","first-page":"2953","article-title":"Exploring models and data for image question answering","author":"ren","year":"2015","journal-title":"Proc 28th Int Conf Neural Inf Process Syst"},{"key":"ref48","first-page":"109","article-title":"Latent semantic analysis for text segmentation","author":"choi","year":"2001","journal-title":"Proc Conf Empirical Methods Natural Language Process"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298927"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"ref44","first-page":"1682","article-title":"A multi-world approach to question answering about real-world scenes based on uncertain input","author":"malinowski","year":"2014","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref43","first-page":"2296","article-title":"Are you talking to a machine? Dataset and methods for multilingual image question answering","author":"gao","year":"2015","journal-title":"Proc 28th Int Conf Neural Inf Process Syst"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/8306529\/07917283.pdf?arnumber=7917283","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:22:52Z","timestamp":1642004572000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7917283\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4,1]]},"references-count":54,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2017.2700381","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2018,4,1]]}}}