{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,20]],"date-time":"2025-10-20T10:15:08Z","timestamp":1760955308522},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"1","license":[{"start":{"date-parts":[[2012,2,1]],"date-time":"2012-02-01T00:00:00Z","timestamp":1328054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Multimedia"],"published-print":{"date-parts":[[2012,2]]},"DOI":"10.1109\/tmm.2011.2168948","type":"journal-article","created":{"date-parts":[[2011,9,28]],"date-time":"2011-09-28T03:28:36Z","timestamp":1317180516000},"page":"88-101","source":"Crossref","is-referenced-by-count":116,"title":["Semantic Model Vectors for Complex Video Event Recognition"],"prefix":"10.1109","volume":"14","author":[{"given":"Michele","family":"Merler","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bert","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lexing","family":"Xie","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gang","family":"Hua","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Apostol","family":"Natsev","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459361"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383487"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/A:1011139631724"},{"key":"ref32","first-page":"527","article-title":"Extracting moving people from internet videos","author":"niebles","year":"2008","journal-title":"Proc Eur Conf Computer Vision (ECCV)"},{"key":"ref31","article-title":"IBM research TRECVID-2009 video retrieval system","author":"natsev","year":"2009","journal-title":"Proc NIST TRECVID Workshop"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/1014052.1014133"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2009.11.014"},{"key":"ref36","author":"roth","year":"2008","journal-title":"Survey of Appearance-Based Methods for Object Recognition"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206537"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539871"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5540234"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2006.194"},{"key":"ref61","first-page":"1321","article-title":"Spatio-temporal event detection using dynamic conditional random fields","author":"yin","year":"2009","journal-title":"Proc 4th Int Joint Conf Artificial Intell (IJCAI)"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.316"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1145\/1459359.1459391"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/1290082.1290117"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1145\/1631272.1631297"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"ref2","year":"2010","journal-title":"Great Scott! Over 35 Hours of Video Uploaded Every Minute to Youtube"},{"key":"ref1","year":"2010","journal-title":"Cisco visual networking index Forecast and methodology 20092014"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/1991996.1992025"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539869"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4408872"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-005-1838-7"},{"key":"ref26","first-page":"1996","article-title":"Recognizing realistic actions from videos in the wild","author":"liu","year":"2009","journal-title":"Proc Conf Comput Vision Pattern Recognition (CVPR)"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2010.5543273"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/1459359.1459392"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.5244\/C.23.124"},{"key":"ref59","author":"yanagawa","year":"2007","journal-title":"Columbia University's Baseline Detectors for 374 LSCOM Semantic Visual Concepts"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/1281192.1281281"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"1985","DOI":"10.1109\/TPAMI.2008.129","article-title":"Video event recognition using Kernel methods with multilevel temporal alignment","volume":"30","author":"xu","year":"2008","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2007.4408865"},{"key":"ref54","first-page":"650","article-title":"An efficient dense and scale-invariant saptio-temporal interest point detector","author":"willems","year":"2008","journal-title":"Proc Eur Conf Computer Vision (ECCV)"},{"key":"ref53","first-page":"109","article-title":"Semantic event detection using conditional random fields","author":"wang","year":"2006","journal-title":"Proc Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/1290082.1290113"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2006.262691"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2010.02.004"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2008.4562960"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383331"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2007.04.002"},{"key":"ref14","first-page":"128","article-title":"Action detection in complex scenes with spatial and temporal ambiguities","author":"hu","year":"2009","journal-title":"Proc Int Conf Computer Vision (ICCV)"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-12307-8_17"},{"key":"ref16","article-title":"Enhanced biologically inspired model","author":"huang","year":"2008","journal-title":"Proc Computer Vision and Pattern Recognition"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459368"},{"key":"ref18","article-title":"Object, scene and actions: Combining multiple features for human action recognition","author":"ikizler-cinbis","year":"2010","journal-title":"Proc Eur Conf Computer Vision (ECCV)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/1631272.1631277"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-009-0351-3"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-009-0342-4"},{"key":"ref6","article-title":"Ontologies for video events","author":"bremond","year":"2004","journal-title":"Research Report Number 5189"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-006-0009-9"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"ref7","article-title":"IBM research TRECVID-2007 video retrieval system","author":"campbell","year":"2007","journal-title":"Proc NIST TRECVID Workshop"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2009.08.004"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539870"},{"key":"ref46","first-page":"2004","article-title":"Hierarchical spatio-temporal context modeling for action recognition","author":"sun","year":"2009","journal-title":"Proc Conf Comput Vision Pattern Recognition (CVPR)"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2009.2032939"},{"key":"ref48","article-title":"Evaluating color descriptors for object and scene recognition","author":"van de sande","year":"0","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref47","first-page":"776","article-title":"Efficient object category recognition using classemes","author":"torresani","year":"2010","journal-title":"Proc Eur Conf Computer Vision (ECCV)"},{"key":"ref42","article-title":"Contextualized eventdriven prediction with ontology-based similarity","author":"sen","year":"2009","journal-title":"Proc AAAI Spring Symp Intelligent Event Processing"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2003.1221649"},{"key":"ref43","first-page":"151","article-title":"High level feature detection from video in trecvid: A 5-year retrospective of achievements","author":"smeaton","year":"2009","journal-title":"Multimedia Content Analysis"}],"container-title":["IEEE Transactions on Multimedia"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6046\/6130620\/06024471.pdf?arnumber=6024471","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,10]],"date-time":"2021-10-10T23:47:35Z","timestamp":1633909655000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6024471\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,2]]},"references-count":65,"journal-issue":{"issue":"1"},"URL":"https:\/\/doi.org\/10.1109\/tmm.2011.2168948","relation":{},"ISSN":["1520-9210","1941-0077"],"issn-type":[{"value":"1520-9210","type":"print"},{"value":"1941-0077","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,2]]}}}