{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,6]],"date-time":"2024-09-06T07:49:32Z","timestamp":1725608972277},"reference-count":53,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/cvpr.2017.225","type":"proceedings-article","created":{"date-parts":[[2017,11,9]],"date-time":"2017-11-09T16:50:33Z","timestamp":1510246233000},"page":"2087-2096","source":"Crossref","is-referenced-by-count":5,"title":["Unified Embedding and Metric Learning for Zero-Exemplar Event Detection"],"prefix":"10.1109","author":[{"given":"Noureldien","family":"Hussein","sequence":"first","affiliation":[]},{"given":"Efstratios","family":"Gavves","sequence":"additional","affiliation":[]},{"given":"Arnold W. M.","family":"Smeulders","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.5244\/C.27.8"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.106"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2970377"},{"journal-title":"Audio event detection using weakly labeled data","year":"2016","author":"kumar","key":"ref32"},{"journal-title":"Voice Recognition Algorithms Using Mel Frequency Cepstral Coefficient (MFCC) and Dynamic Time Warping (DTW) Techniques","year":"2010","author":"muda","key":"ref31"},{"key":"ref30","article-title":"Rank pooling for action recognition","author":"fernando","year":"2016","journal-title":"IEEE TPAMI"},{"journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition","year":"2014","author":"simonyan","key":"ref37"},{"key":"ref36","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"NIPS"},{"journal-title":"Deep residual learning for image recognition","year":"2015","author":"he","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995407"},{"key":"ref27","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"NIPS"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-014-0069-5"},{"key":"ref2","article-title":"Trecvid 2014-an overview of the goals, tasks, data, evaluation mechanisms and metrics","author":"over","year":"2014","journal-title":"TRECVID Workshop"},{"key":"ref1","article-title":"Trecvid 2013-an introduction to the goals, tasks, data, evaluation mechanisms, and metrics","author":"over","year":"2013","journal-title":"TRECVID Workshop"},{"journal-title":"Zero-shot event detection by multimodal distributional semantic embedding of videos","year":"2015","author":"elhoseiny","key":"ref20"},{"key":"ref22","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"NIPS"},{"journal-title":"Exploiting similarities among languages for machine translation","year":"2013","author":"mikolov","key":"ref21"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578764"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654918"},{"journal-title":"C3d generic features for video analysis","year":"2014","author":"tran","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2016.7477706"},{"key":"ref50","article-title":"Skip-thought vectors","author":"kiros","year":"2015","journal-title":"NIPS"},{"key":"ref51","article-title":"Visualizing data using t-sne","author":"van der maaten","year":"2008","journal-title":"JMLR"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2670560"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/1991996.1992025"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2971480"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806237"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.207"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.313"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.313"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806221"},{"year":"0","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654913"},{"key":"ref17","article-title":"Videostory embeddings recognize events when examples are scarce","author":"habibian","year":"2016","journal-title":"IEEE TPAMI"},{"key":"ref18","article-title":"Tagbook: A semantic video representation without supervision for event detection","author":"mazloom","year":"2015","journal-title":"IEEE TMM"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.341"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/2578726.2578746"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749399"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2359771"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749403"},{"journal-title":"Dynamic concept composition for zero-example event detection","year":"2016","author":"chang","key":"ref8"},{"key":"ref7","article-title":"Semantic concept discovery for large-scale zero-shot event detection","author":"chang","year":"2015","journal-title":"IJCAI"},{"journal-title":"Wikipedia","year":"2016","key":"ref49"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.208"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9"},{"journal-title":"Places An image database for deep scene understanding","year":"2016","author":"zhou","key":"ref48"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref42","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"NIPS"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749404"},{"key":"ref44","article-title":"Latent dirichlet allocation","author":"blei","year":"2003","journal-title":"JMLR"},{"key":"ref43","article-title":"Distributed representations of sentences and documents","author":"le","year":"2014","journal-title":"ICML"}],"event":{"name":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2017,7,21]]},"location":"Honolulu, HI","end":{"date-parts":[[2017,7,26]]}},"container-title":["2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8097368\/8099483\/08099708.pdf?arnumber=8099708","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,12,15]],"date-time":"2017-12-15T15:49:48Z","timestamp":1513352988000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8099708\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":53,"URL":"https:\/\/doi.org\/10.1109\/cvpr.2017.225","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}