{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T05:42:07Z","timestamp":1780465327231,"version":"3.54.1"},"reference-count":55,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1109\/cvprw.2015.7301268","type":"proceedings-article","created":{"date-parts":[[2015,10,26]],"date-time":"2015-10-26T22:08:35Z","timestamp":1445897315000},"page":"18-26","source":"Crossref","is-referenced-by-count":125,"title":["Convolutional recurrent neural networks: Learning spatial dependencies for image representation"],"prefix":"10.1109","author":[{"given":"Zhen","family":"Zuo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bing","family":"Shuai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xingxing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yushi","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0636-x"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.222"},{"key":"ref32","article-title":"3D object recognition, with deep belief nets","author":"nair","year":"2009","journal-title":"NIPS"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2010-343","article-title":"Recurrent neural network based language model","author":"mikolov","year":"2010","journal-title":"InterSpeech"},{"key":"ref30","author":"mikolov","year":"2012","journal-title":"Statistical Language Models Based on Neural Networks"},{"key":"ref37","article-title":"Discriminative recurrent sparse auto-encoders","author":"rolfe","year":"2013","journal-title":"arXiv preprint arXiv 1301 3775"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"ref35","article-title":"Recurrent convolutional neural networks for scene labeling","author":"pinheiro","year":"2014","journal-title":"ICML"},{"key":"ref34","article-title":"Deepid-net: multi-stage and deformable deep convolutional neural networks for object detection","author":"ouyang","year":"2014","journal-title":"arXiv preprint arXiv 1409 3505"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.476"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.115"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1109\/TIFS.2015.2408431","article-title":"Joint feature learning for face recognition","author":"lu","year":"2015","journal-title":"Information Forensics and Security IEEE Transactions on"},{"key":"ref2","article-title":"Modeling temporal dependencies in high-dimensional sequences: Application to polyphonic music generation and transcription","author":"boulanger-lewando ski","year":"2012","journal-title":"ICML"},{"key":"ref1","article-title":"Generalized denoising auto-encoders as generative models","author":"bengio","year":"2013","journal-title":"NIPS"},{"key":"ref20","article-title":"A clockwork rnn","author":"koutn\u00edk","year":"2014","journal-title":"ICML"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.186"},{"key":"ref21","article-title":"Imagenet classification, with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"NIPS"},{"key":"ref24","article-title":"Deeply-supervised nets","author":"lee","year":"2014","journal-title":"arXiv preprint arXiv 1409 5185"},{"key":"ref23","article-title":"Hand, ritten digit recognition, with a back-propagation network","author":"le cun","year":"1990","journal-title":"NIPS"},{"key":"ref26","article-title":"Object bank: A high-level image representation for scene classification & semantic feature sparsification","author":"li","year":"2010","journal-title":"NIPS"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553453"},{"key":"ref50","article-title":"Modeling video dynamics, with deep dynencoder","volume":"2","author":"yan","year":"2014","journal-title":"ECCV"},{"key":"ref51","article-title":"Visualizing and understanding convolutional neural networks","author":"zeiler","year":"2013","journal-title":"arXiv preprint arXiv 1311 2901"},{"key":"ref55","article-title":"Learning discriminative and shareable features for scene classification","author":"zuo","year":"2014","journal-title":"ECCV"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2014.2298888"},{"key":"ref53","article-title":"Coarse-to-fine auto-encoder networks (cfan) for real-time face alignment","author":"zhang","year":"2014","journal-title":"ECCV"},{"key":"ref52","article-title":"Deep learning of scene-specific classifier for pedestrian detection","author":"zeng","year":"2014","journal-title":"ECCV"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref11","article-title":"Multi-scale orderless pooling of deep convolutional activation features","author":"gong","year":"2014","journal-title":"ECCV"},{"key":"ref40","article-title":"Overfeat: Integrated recognition, localization and detection using convolutional networks","author":"sermanet","year":"2013","journal-title":"arXiv preprint arXiv 1312 6229"},{"key":"ref12","article-title":"Towards end-to-end speech recognition, ith recurrent neural networks","author":"graves","year":"2014","journal-title":"ICML"},{"key":"ref13","first-page":"545","article-title":"Offline hand, riting recognition, ith multidimensional recurrent neural networks","author":"graves","year":"2009","journal-title":"Advances in neural information processing systems"},{"key":"ref14","article-title":"Spatial pyramid pooling in deep convolutional networks for visual recognition","author":"he","year":"2014","journal-title":"ECCV"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data, with neural networks","volume":"313","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref17","author":"jaeger","year":"2002","journal-title":"Tutorial on Training Recurrent Neural Networks Covering BPPT RTRL EKF and the Echo State Network Approach"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.124"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248110"},{"key":"ref3","article-title":"Return of the devil in the details: Delving deep into convolutional nets","author":"chatfield","year":"2014","journal-title":"BMVC"},{"key":"ref6","article-title":"Mid-level visual element discovery as discriminative mode seeking","author":"doersch","year":"2013","journal-title":"NIPS"},{"key":"ref5","article-title":"Imagenet: A large-scale hierarchical image database","author":"deng","year":"2009","journal-title":"CVPR"},{"key":"ref8","article-title":"Understanding deep architectures using a recursive convolutional network","author":"eigen","year":"2013","journal-title":"arXiv preprint arXiv 1312 1847"},{"key":"ref7","article-title":"Decaf: A deep convolutional activation feature for generic visual recognition","author":"donahue","year":"0","journal-title":"ICML 2014"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/0364-0213(90)90002-E"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.220"},{"key":"ref45","article-title":"Going deeper, with convolutions","author":"szegedy","year":"2014","journal-title":"arXiv preprint arXiv 1409 4842"},{"key":"ref48","article-title":"Max-margin multiple-instance dictionary learning","author":"wang","year":"2013","journal-title":"ICML"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2403231"},{"key":"ref42","article-title":"Deep learning face representation from predicting 10, 000 classes","author":"sun","year":"2014","journal-title":"CVPR"},{"key":"ref41","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014","journal-title":"arXiv preprint arXiv 1409 1556"},{"key":"ref44","article-title":"The recurrent temporal restricted boltzmann machine","author":"sutskever","year":"2009","journal-title":"NIPS"},{"key":"ref43","author":"sutskever","year":"2013","journal-title":"Training Recurrent Neural Networks"}],"event":{"name":"2015 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","location":"Boston, MA, USA","start":{"date-parts":[[2015,6,7]]},"end":{"date-parts":[[2015,6,12]]}},"container-title":["2015 IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7293053\/7301265\/07301268.pdf?arnumber=7301268","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,24]],"date-time":"2022-05-24T10:31:02Z","timestamp":1653388262000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7301268\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,6]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/cvprw.2015.7301268","relation":{},"subject":[],"published":{"date-parts":[[2015,6]]}}}