{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T12:13:32Z","timestamp":1764936812552,"version":"3.28.0"},"reference-count":41,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,6]]},"DOI":"10.1109\/cvpr.2015.7298768","type":"proceedings-article","created":{"date-parts":[[2015,10,15]],"date-time":"2015-10-15T22:42:06Z","timestamp":1444948926000},"page":"1600-1609","source":"Crossref","is-referenced-by-count":33,"title":["Recognize complex events from static images by fusing deep channels"],"prefix":"10.1109","author":[{"family":"Yuanjun Xiong","sequence":"first","affiliation":[]},{"family":"Kai Zhu","sequence":"additional","affiliation":[]},{"family":"Dahua Lin","sequence":"additional","affiliation":[]},{"given":"Xiaoou","family":"Tang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"722","DOI":"10.1007\/978-3-642-33712-3_52","article-title":"Complex events detection using data-driven concepts","volume":"7574","author":"yang","year":"2012","journal-title":"Computer Vision ECCV 2012"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539879"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0620-5"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6247808"},{"key":"ref31","article-title":"Deep learning face representation by joint identification-verification","author":"sun","year":"2014","journal-title":"CxgoRR abs\/1406 4773"},{"key":"ref30","first-page":"2222","article-title":"Multimodal learning with deep boltzmann machines","author":"srivatsa","year":"2012","journal-title":"Advances in Neural Information Processing Systems 25"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1992.223161"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.341"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.5244\/C.23.124"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2008.4761663"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"1691","DOI":"10.1109\/TPAMI.2012.67","article-title":"Recognizing human-object interactions in still images by modeling the mutual context of objects and human poses","volume":"34","author":"yao","year":"2012","journal-title":"Pattern Analysis and Machine Intelligence IEEE Transactions on"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref12","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in Neural Information Processing Systems 25"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"2169","DOI":"10.1109\/CVPR.2006.68","article-title":"Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories","volume":"2","author":"lazebnik","year":"2006","journal-title":"Computer Vision and Pattern Recognition 2006 IEEE Computer Society Conference on"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.445"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4408872"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"2036","DOI":"10.1109\/CVPR.2009.5206718","article-title":"Towards total scene understanding: Classification, annotation and segmentations in an automatic framework","author":"li","year":"2009","journal-title":"Computer Vision and Pattern Recognition 2009 CVPR 2009 IEEE Conference on"},{"key":"ref17","first-page":"1378","article-title":"Object bank: A high-level image representation for scene classification & semantic feature sparsification","author":"li","year":"2010","journal-title":"Advances in neural information processing systems"},{"key":"ref18","first-page":"1120","article-title":"Exploiting multi-modal interactions: A unified framework","author":"li","year":"2009","journal-title":"IJ-CAl"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1007\/978-3-319-10602-1_29","article-title":"Exploiting privileged information from web data for image categorization","volume":"8693","author":"li","year":"2014","journal-title":"Computer Vision - ECCV 2014"},{"key":"ref28","article-title":"Imagenet large scale visual recognition challenge","author":"russakovsky","year":"2014","journal-title":"CoRR abs\/1409 0575"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"577","DOI":"10.1007\/978-3-642-15549-9_42","article-title":"Tracklet descriptors for action modeling and video analysis","author":"raptis","year":"2010","journal-title":"Computer Vision-ECCV 2010"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2300479"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.231"},{"key":"ref29","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"CoRR abs\/1406 2199"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.265"},{"key":"ref8","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/978-3-319-10584-0_23","article-title":"Learning rich features from rgb-d images for object detection and segmentation","volume":"8695","author":"gupta","year":"2014","journal-title":"Computer Vision - ECCV 2014"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref2","first-page":"158","article-title":"Detecting actions, poses, and objects with relational phraselets","author":"desai","year":"2012","journal-title":"Computer Vision-ECCV"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2006.876289"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995491"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.329"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2006.63"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995631"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"ref41","doi-asserted-by":"crossref","first-page":"391","DOI":"10.1007\/978-3-319-10602-1_26","article-title":"Edge boxes: Locating object proposals from oedges","volume":"8693","author":"zitnick","year":"2014","journal-title":"Computer Vision - ECCV 2014"},{"key":"ref23","first-page":"689","article-title":"Multimodal deep learning","author":"ngiam","year":"2011","journal-title":"Proceedings of the 28th International Conference on Machine Learning (ICML-11)"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126383"},{"key":"ref25","article-title":"Trecvid 2014- an overview of the goals, tasks, data, evaluation mechanisms and metrics","author":"over","year":"2014","journal-title":"Proceedings of TRECVID 2014"}],"event":{"name":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2015,6,7]]},"location":"Boston, MA, USA","end":{"date-parts":[[2015,6,12]]}},"container-title":["2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7293313\/7298593\/07298768.pdf?arnumber=7298768","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,15]],"date-time":"2023-08-15T11:37:01Z","timestamp":1692099421000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7298768\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,6]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/cvpr.2015.7298768","relation":{},"subject":[],"published":{"date-parts":[[2015,6]]}}}