{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,22]],"date-time":"2024-10-22T18:27:07Z","timestamp":1729621627555,"version":"3.28.0"},"reference-count":17,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1109\/icme.2015.7177427","type":"proceedings-article","created":{"date-parts":[[2015,8,12]],"date-time":"2015-08-12T22:44:22Z","timestamp":1439419462000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["Predicting image caption by a unified hierarchical model"],"prefix":"10.1109","author":[{"family":"Lin Bai","sequence":"first","affiliation":[]},{"family":"Kan Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","first-page":"2466","article-title":"Human action recognition using a temporal hierarchy of covariance descriptors on 3d joint locations","author":"mohamed","year":"2013","journal-title":"Proceedings of the Twenty-Third International Joint Conference on Artificial Intelligence"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.406"},{"key":"ref12","doi-asserted-by":"crossref","first-page":"158","DOI":"10.1007\/978-3-642-33765-9_12","article-title":"Detecting actions, poses, and objects with relational phraselets","author":"desai","year":"2012","journal-title":"Computer Vision-ECCV 2012"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"351","DOI":"10.1162\/tacl_a_00188","article-title":"Treetalk: Composition and compression of trees for image descriptions","volume":"2","author":"kuznetsova","year":"2014","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1126\/science.1127647","article-title":"Reducing the dimensionality of data with neural networks","volume":"313","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref15","doi-asserted-by":"crossref","first-page":"196","DOI":"10.1007\/978-3-642-34500-5_24","article-title":"From image annotation to image description","author":"gupta","year":"2012","journal-title":"Neural Information Processing"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"328","DOI":"10.1007\/s11263-010-0400-4","article-title":"Recovering occlusion boundaries from an image","volume":"91","author":"derek","year":"2011","journal-title":"International Journal of Computer Vision"},{"key":"ref17","first-page":"1292","article-title":"Image description using visual dependency representations","author":"elliott","year":"2013","journal-title":"EMNLP"},{"key":"ref4","article-title":"Going deeper with convolutions","author":"szegedy","year":"2014","journal-title":"arXiv preprint arXiv 1409 4842"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.0700622104"},{"key":"ref6","article-title":"Show and tell: A neural image caption generator","author":"vinyals","year":"2014","journal-title":"arXiv preprint arXiv 1411 4555"},{"key":"ref5","first-page":"1889","article-title":"Deep fragment embeddings for bidirectional image sentence mapping","author":"karpathy","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref8","first-page":"3144","article-title":"Recognising human-object interaction via exemplar based modelling","author":"jian-fang","year":"2013","journal-title":"Computer Vision (ICCV) 2013 IEEE International Conference on"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540234"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"1691","DOI":"10.1109\/TPAMI.2012.67","article-title":"Recognizing human-object interactions in still images by modeling the mutual context of objects and human poses","volume":"34","author":"yao","year":"2012","journal-title":"Pattern Analysis and Machine Intelligence IEEE Transactions on"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2009.83"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995711"}],"event":{"name":"2015 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2015,6,29]]},"location":"Turin, Italy","end":{"date-parts":[[2015,7,3]]}},"container-title":["2015 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7160935\/7177375\/07177427.pdf?arnumber=7177427","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,10,14]],"date-time":"2020-10-14T14:49:24Z","timestamp":1602686964000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7177427"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,6]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/icme.2015.7177427","relation":{},"subject":[],"published":{"date-parts":[[2015,6]]}}}