{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T14:58:02Z","timestamp":1775055482952,"version":"3.50.1"},"reference-count":113,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2010,11,10]],"date-time":"2010-11-10T00:00:00Z","timestamp":1289347200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2011,1]]},"DOI":"10.1007\/s11042-010-0643-7","type":"journal-article","created":{"date-parts":[[2010,11,9]],"date-time":"2010-11-09T02:59:36Z","timestamp":1289271576000},"page":"279-302","source":"Crossref","is-referenced-by-count":111,"title":["Event detection and recognition for semantic annotation of video"],"prefix":"10.1007","volume":"51","author":[{"given":"Lamberto","family":"Ballan","sequence":"first","affiliation":[]},{"given":"Marco","family":"Bertini","sequence":"additional","affiliation":[]},{"given":"Alberto","family":"Del Bimbo","sequence":"additional","affiliation":[]},{"given":"Lorenzo","family":"Seidenari","sequence":"additional","affiliation":[]},{"given":"Giuseppe","family":"Serra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2010,11,10]]},"reference":[{"key":"643_CR1","doi-asserted-by":"crossref","unstructured":"Akdemir U, Turaga P, Chellappa R (2008) An ontology based approach for activity recognition from video. In: Proc. of ACM multimedia (MM)","DOI":"10.1145\/1459359.1459466"},{"key":"643_CR2","doi-asserted-by":"crossref","unstructured":"Arndt R, Troncy R, Staab S, Hardman L, Vacura M (2007) Comm: designing a well-founded multimedia ontology for the web. In: Proc. of int\u2019l semantic web conference","DOI":"10.1007\/978-3-540-76298-0_3"},{"key":"643_CR3","doi-asserted-by":"crossref","unstructured":"Artikis A, Sergot M, Paliouras G (2010) A logic programming approach to activity recognition. In: Proc. of ACM int\u2019l workshop on events in multimedia","DOI":"10.1145\/1877937.1877941"},{"key":"643_CR4","unstructured":"Assfalg J, Bertini M, Del Bimbo A, Nunziati W, Pala P (2002) Soccer highlights detection and recognition using HMMs. In: Proc. of int\u2019l conference on multimedia & expo (ICME)"},{"issue":"2\u20133","key":"643_CR5","doi-asserted-by":"crossref","first-page":"285","DOI":"10.1016\/j.cviu.2003.06.004","volume":"92","author":"J Assfalg","year":"2003","unstructured":"Assfalg J, Bertini M, Colombo C, Del Bimbo A, Nunziati W (2003) Semantic annotation of soccer videos: automatic highlights identification. Comput Vis Image Underst 92(2\u20133):285\u2013305","journal-title":"Comput Vis Image Underst"},{"key":"643_CR6","doi-asserted-by":"crossref","unstructured":"Bai L, Lao S, Jones G, Smeaton AF (2007) Video semantic content analysis based on ontology. In: Proc. of int\u2019l machine vision and image processing conference","DOI":"10.1109\/IMVIP.2007.13"},{"key":"643_CR7","doi-asserted-by":"crossref","unstructured":"Bai L, Lao S, Zhang W, Jones G, Smeaton A (2007) A semantic event detection approach for soccer video based on perception concepts and finite state machines. In: Proc. intl\u2019l workshop on image analysis for multimedia interactive services (WIAMIS)","DOI":"10.1109\/WIAMIS.2007.12"},{"key":"643_CR8","doi-asserted-by":"crossref","unstructured":"Ballan L, Bertini M, Del Bimbo A, Seidenari L, Serra G (2009) Recognizing human actions by fusing spatio-temporal appearance and motion descriptors. In: Proc. of int\u2019l conference on image processing (ICIP). Cairo, Egypt","DOI":"10.1109\/ICIP.2009.5414332"},{"issue":"2","key":"643_CR9","doi-asserted-by":"crossref","first-page":"313","DOI":"10.1007\/s11042-009-0342-4","volume":"48","author":"L Ballan","year":"2010","unstructured":"Ballan L, Bertini M, Del Bimbo A, Serra G (2010) Semantic annotation of soccer videos by visual instance clustering and spatial\/temporal reasoning in ontologies. Multimed Tools Appl 48(2):313\u2013337","journal-title":"Multimed Tools Appl"},{"issue":"1","key":"643_CR10","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1007\/s11042-009-0351-3","volume":"48","author":"L Ballan","year":"2010","unstructured":"Ballan L, Bertini M, Del Bimbo A, Serra G (2010) Video event classification using string kernels. Multimed Tools Appl 48(1):69\u201387","journal-title":"Multimed Tools Appl"},{"key":"643_CR11","author":"L Ballan","year":"2010","unstructured":"Ballan L, Bertini M, Del Bimbo A, Serra G (2010) Video annotation and retrieval using ontologies and rule learning. IEEE Multimed doi: 10.1109\/MMUL.2004.4","journal-title":"IEEE Multimed"},{"issue":"3","key":"643_CR12","doi-asserted-by":"crossref","first-page":"360","DOI":"10.1016\/j.cviu.2007.09.016","volume":"110","author":"A Basharat","year":"2008","unstructured":"Basharat A, Zhai Y, Shah M (2008) Content based video matching using spatiotemporal volumes. Comput Vis Image Underst 110(3):360\u2013377","journal-title":"Comput Vis Image Underst"},{"issue":"3","key":"643_CR13","doi-asserted-by":"crossref","first-page":"346","DOI":"10.1016\/j.cviu.2007.09.014","volume":"110","author":"H Bay","year":"2008","unstructured":"Bay H, Ess A, Tuytelaars T, Van Gool L (2008) SURF: speeded up robust features. Comput Vis Image Underst 110(3):346\u2013359","journal-title":"Comput Vis Image Underst"},{"issue":"2","key":"643_CR14","doi-asserted-by":"crossref","first-page":"215","DOI":"10.1007\/s11042-005-2575-1","volume":"27","author":"M Bertini","year":"2005","unstructured":"Bertini M, Del Bimbo A, Nunziati W (2005) Common visual cues for sports highlights modeling. Multimed Tools Appl 27(2):215\u2013218","journal-title":"Multimed Tools Appl"},{"key":"643_CR15","doi-asserted-by":"crossref","unstructured":"Bertini M, Del Bimbo A, Torniai C, Cucchiara R, Grana C (2007) Dynamic pictorial ontologies for video digital libraries annotation. In: Proc. of ACM int\u2019l workshop on many faces of multimedia semantics (MS)","DOI":"10.1145\/1290067.1290076"},{"key":"643_CR16","doi-asserted-by":"crossref","unstructured":"Bertini M, Del Bimbo A, Serra G (2008) Learning ontology rules for semantic video annotation. In: Proc. of ACM int\u2019l workshop on many faces of multimedia semantics (MS)","DOI":"10.1145\/1460676.1460678"},{"key":"643_CR17","doi-asserted-by":"crossref","unstructured":"Bloehdorn S, Petridis K, Saathoff C, Simou N, Tzouvaras V, Avrithis Y, Handschuh S, Kompatsiaris I, Staab S, Strintzis M (2005) Semantic annotation of images and videos for multimedia analysis. In: Proc. of European semantic web conference","DOI":"10.1007\/11431053_40"},{"issue":"8","key":"643_CR18","doi-asserted-by":"crossref","first-page":"844","DOI":"10.1109\/34.868685","volume":"22","author":"M Brand","year":"2000","unstructured":"Brand M, Kettnaker V (2000) Discovery and segmentation of activities in video. IEEE Trans Pattern Anal Mach Intell 22(8):844\u2013851","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"3","key":"643_CR19","doi-asserted-by":"crossref","first-page":"416","DOI":"10.1109\/TSMCC.2008.919173","volume":"38","author":"D Brezeale","year":"2008","unstructured":"Brezeale D, Cook D (2008) Automatic video classification: a survey of the literature. IEEE Trans Syst Man Cybern 38(3):416\u2013430","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"643_CR20","unstructured":"Chao C, Shih HC, Huang CL (2005) Semantics-based highlight extraction of soccer program using DBN. In: Proc. of int\u2019l conference on acoustics, speech, and signal processing (ICASSP)"},{"key":"643_CR21","doi-asserted-by":"crossref","unstructured":"Chen D, Yang J, Wactlar HD (2004) Towards automatic analysis of social interaction patterns in a nursing home environment from video. In: Proc. of int\u2019l workshop on multimedia information retrieval (MIR)","DOI":"10.1145\/1026711.1026757"},{"key":"643_CR22","unstructured":"Chen M, Hauptmann A, Li H (2009) Informedia @ TRECVID2009: analyzing video motions. In: Proc. of the TRECVID workshop"},{"issue":"10","key":"643_CR23","doi-asserted-by":"crossref","first-page":"1210","DOI":"10.1109\/TCSVT.2005.854238","volume":"15","author":"S Dasiopoulou","year":"2005","unstructured":"Dasiopoulou S, Mezaris V, Kompatsiaris I, Papastathis VK, Strintzis MG (2005) Knowledge-assisted semantic video object detection. IEEE Trans Circuits Syst Video Technol 15(10):1210\u20131224","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"643_CR24","unstructured":"Dasiopoulou S, Saathoff C, Mylonas P, Avrithis Y, Kompatsiaris Y, Staab S, Strintzis M (2008) Semantic multimedia and ontologies theory and applications, chapter introducing context and reasoning in visual content analysis: an ontology-based framework. Springer, pp 99\u2013122"},{"key":"643_CR25","unstructured":"Dollar P, Rabaud V, Cottrell G, Belongie S (2005) Behavior recognition via sparse spatio-temporal features. In: Proc. of int\u2019l workshop on visual surveillance and performance evaluation of tracking and surveillance (VS-PETS)"},{"key":"643_CR26","unstructured":"Dousson C, Le Maigat P (2007) Chronicle recognition improvement using temporal focusing and hierarchization. In: Proc. of int\u2019l joint conference on artificial intelligence"},{"key":"643_CR27","unstructured":"Dublin Core Metadata Initiative. http:\/\/dublincore.org\/ . Accessed 11 October 2010"},{"key":"643_CR28","doi-asserted-by":"crossref","unstructured":"Ebadollahi S, Xie L, Chang SF, Smith J (2006) Visual event detection using multi-dimensional concept dynamics. In: Proc. of int\u2019l conference on multimedia & expo (ICME)","DOI":"10.1109\/ICME.2006.262691"},{"key":"643_CR29","doi-asserted-by":"crossref","unstructured":"Fathi A, Mori G (2008) Action recognition by learning mid-level motion features. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2008.4587735"},{"key":"643_CR30","series-title":"A semantic network of English verbs","doi-asserted-by":"crossref","DOI":"10.7551\/mitpress\/7287.001.0001","volume-title":"WordNet: an electronic lexical database, chap 3","author":"C Fellbaum","year":"1998","unstructured":"Fellbaum C (1998) WordNet: an electronic lexical database, chap 3. A semantic network of English verbs. MIT, Cambridge"},{"key":"643_CR31","unstructured":"Fergus R, Perona P, Zisserman A (2003) Object class recognition by unsupervised scale-invariant learning. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR32","unstructured":"Fihl P, Holte M, Moeslund T (2007) Motion primitives for action recognition. In: Proc. of int\u2019l workshop on gesture in human-computer interaction and simulation"},{"issue":"4","key":"643_CR33","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1109\/MMUL.2005.87","volume":"12","author":"A Francois","year":"2005","unstructured":"Francois A, Nevatia R, Hobbs J, Bolles R, Smith J (2005) VERL: an ontology framework for representing and annotating video events. IEEE Multimed 12(4):76\u201386","journal-title":"IEEE Multimed"},{"key":"643_CR34","unstructured":"Garcia R, Celma O (2005) Semantic integration and retrieval of multimedia metadata. In: Proc. of the knowledge markup and semantic annotation workshop"},{"key":"643_CR35","doi-asserted-by":"crossref","unstructured":"Georis B, Mazi\u00e8re M, Br\u00e9mond F, Thonnat M (2004) A video interpretation platform applied to bank agency monitoring. In: Proc. of intelligent distributed surveillance systems workshop","DOI":"10.1049\/ic:20040097"},{"issue":"5\u20136","key":"643_CR36","doi-asserted-by":"crossref","first-page":"907","DOI":"10.1006\/ijhc.1995.1081","volume":"43","author":"T Gruber","year":"1995","unstructured":"Gruber T (1995) Principles for the design of ontologies used for knowledge sharing. Int J Human-comput Stud 43(5\u20136):907\u2013928","journal-title":"Int J Human-comput Stud"},{"key":"643_CR37","doi-asserted-by":"crossref","unstructured":"Hakeem A, Shah M (2004) Ontology and taxonomy collaborated framework for meeting classification. In: Proc. of int\u2019l conference on pattern recognition (ICPR)","DOI":"10.1109\/ICPR.2004.1333743"},{"key":"643_CR38","first-page":"1","volume":"2009","author":"N Harte","year":"2009","unstructured":"Harte N, Lennon D, Kokaram A (2009) On parsing visual sequences with the hidden Markov model. EURASIP JIVP 2009:1\u201313","journal-title":"EURASIP JIVP"},{"key":"643_CR39","doi-asserted-by":"crossref","unstructured":"Haubold A, Naphade M (2007) Classification of video events using 4-dimensional time-compressed motion features. In: Proc. of ACM international conference on image and video retrieval (CIVR), pp 178\u2013185","DOI":"10.1145\/1282280.1282311"},{"key":"643_CR40","doi-asserted-by":"crossref","unstructured":"Hollink L, Little S, Hunter J (2005) Evaluating the application of semantic inferencing rules to image annotation. In: Proc. of int\u2019l conference on knowledge capture","DOI":"10.1145\/1088622.1088639"},{"key":"643_CR41","author":"H Jhuang","year":"2010","unstructured":"Jhuang H, Garrote E, Yu X, Khilnani V, Poggio T, Steele A, Serre T (2010) Automated home-cage behavioral phenotyping of mice. Nature communications doi: 10.1038\/ncomms.1064","journal-title":"Nature communications"},{"issue":"2","key":"643_CR42","doi-asserted-by":"crossref","first-page":"83","DOI":"10.1023\/A:1012460413855","volume":"45","author":"T Kadir","year":"2001","unstructured":"Kadir T, Brady M (2001) Saliency, scale and image description. Int J Comput Vis 45(2):83\u2013105","journal-title":"Int J Comput Vis"},{"issue":"9","key":"643_CR43","first-page":"1163","volume":"13","author":"A Kale","year":"2004","unstructured":"Kale A, Sundaresan A, Rajagopalan AN, Cuntoor NP, Roy-Chowdhury AK, Kruger V, Chellappa R (2004) Identification of humans using gait. IEEE Trans Knowl Data Eng 13(9):1163\u20131173","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"643_CR44","unstructured":"Kennedy L (2006) Revision of LSCOM event\/activity annotations, DTO challenge workshop on large scale concept ontology for multimedia. Advent technical report #221-2006-7, Columbia University"},{"key":"643_CR45","unstructured":"Kienzle W, Scholkopf B, Wichmann F, Franz MO (2007) How to find interesting locations in video: a spatiotemporal interest point detector learned from human eye movements. In: Proc. of 29th annual symposium of the german association for pattern recognition. Springer"},{"key":"643_CR46","doi-asserted-by":"crossref","unstructured":"Kl\u00e4ser A, Marsza\u0142ek M, Schmid C (2008) A spatio-temporal descriptor based on 3D-Gradients. In: Proc. of British machine vision conference (BMVC)","DOI":"10.5244\/C.22.99"},{"key":"643_CR47","doi-asserted-by":"crossref","unstructured":"Ko T (2008) A survey on behavior analysis in video surveillance for homeland security applications. In: 37th IEEE applied imagery pattern recognition workshop, pp 1\u20138","DOI":"10.1109\/AIPR.2008.4906450"},{"key":"643_CR48","doi-asserted-by":"crossref","unstructured":"Kompatsiaris Y, Hobson P (2008) Semantic multimedia and ontologies: theory and applications. Springer","DOI":"10.1007\/978-1-84800-076-6"},{"issue":"1","key":"643_CR49","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1007\/BF03037383","volume":"4","author":"R Kowalski","year":"1986","unstructured":"Kowalski R, Sergot M (1986) A logic-based calculus of events. New Gener Comput 4(1):67\u201395","journal-title":"New Gener Comput"},{"key":"643_CR50","doi-asserted-by":"crossref","unstructured":"Kuettel D, Breitenstein MD, Van Gool L, Ferrari V (2010) What\u2019s going on? discovering spatio-temporal dependencies in dynamic scenes. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5539869"},{"key":"643_CR51","doi-asserted-by":"crossref","unstructured":"Laptev I, Lindeberg T (2003) Space-time interest points. In: Proc. of int\u2019l conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2003.1238378"},{"issue":"2\u20133","key":"643_CR52","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev I (2005) On space-time interest points. Int J Comput Vis 64(2\u20133):107\u2013123","journal-title":"Int J Comput Vis"},{"key":"643_CR53","doi-asserted-by":"crossref","unstructured":"Laptev I, Perez P (2007) Retrieving actions in movies. In: Proc. of int\u2019l conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2007.4409105"},{"key":"643_CR54","doi-asserted-by":"crossref","unstructured":"Laptev I, Marszalek M, Schmid C, Rozenfeld B (2008) Learning realistic human actions from movies. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"643_CR55","doi-asserted-by":"crossref","unstructured":"Lavee G, Borzin A, Rivlin E, Rudzsky M (2007) Building Petri nets from video event ontologies. In: Proc. of international symposium on visual computing (ISVC). LNCS, vol 4841. Springer Verlag, pp 442\u2013451","DOI":"10.1007\/978-3-540-76858-6_44"},{"issue":"5","key":"643_CR56","doi-asserted-by":"crossref","first-page":"489","DOI":"10.1109\/TSMCC.2009.2023380","volume":"39","author":"G Lavee","year":"2009","unstructured":"Lavee G, Rivlin E, Rudzsky M (2009) Understanding video events: a survey of methods for automatic interpretation of semantic occurrences in video. IEEE Trans Syst Man Cybern 39(5):489\u2013504","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"643_CR57","unstructured":"Lazebnik S, Schmid C, Ponce J (2006) Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR58","doi-asserted-by":"crossref","unstructured":"Leslie L, Chua TS, Ramesh J (2007) Annotation of paintings with high-level semantic concepts using transductive inference and ontology-based concept disambiguation. In: Proc. of ACM multimedia (MM)","DOI":"10.1145\/1291233.1291335"},{"key":"643_CR59","unstructured":"Liu J, Shah M (2008) Learning human actions via information maximization. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR60","doi-asserted-by":"crossref","unstructured":"Liu J, Luo J, Shah M (2009) Recognizing realistic actions from videos \u201cin the wild\u201d. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2009.5206744"},{"issue":"2","key":"643_CR61","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"key":"643_CR62","unstructured":"Luo M, Ma YF, Zhang HJ (2003) Pyramidwise structuring for soccer highlight extraction. In: Proc. of ICICS-PCM"},{"key":"643_CR63","doi-asserted-by":"crossref","unstructured":"Mahadevan V, Li W, Bhalodia V, Vasconcelos N (2010) Anomaly detection in crowded scenes. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2010.5539872"},{"issue":"1","key":"643_CR64","doi-asserted-by":"crossref","first-page":"102","DOI":"10.1016\/j.imavis.2005.07.027","volume":"26","author":"N Maillot","year":"2008","unstructured":"Maillot N, Thonnat M (2008) Ontology based complex object recognition. Image Vis Comput 26(1):102\u2013113","journal-title":"Image Vis Comput"},{"key":"643_CR65","doi-asserted-by":"crossref","unstructured":"Marszalek M, Laptev I, Schmid C (2009) Actions in context. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPRW.2009.5206557"},{"key":"643_CR66","doi-asserted-by":"crossref","unstructured":"Mehran R, Moore B, Shah M (2010) A streakline representation of flow in crowded scenes. In: Proc. of European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-642-15558-1_32"},{"issue":"10","key":"643_CR67","doi-asserted-by":"crossref","first-page":"1615","DOI":"10.1109\/TPAMI.2005.188","volume":"27","author":"K Mikolajczyk","year":"2005","unstructured":"Mikolajczyk K, Schmid C (2005) A performance evaluation of local descriptors. IEEE Trans Pattern Anal Mach Intell 27(10):1615\u20131630","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"1\/2","key":"643_CR68","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1007\/s11263-005-3848-x","volume":"65","author":"K Mikolajczyk","year":"2005","unstructured":"Mikolajczyk K, Tuytelaars T, Schmid C, Zisserman A, Matas J, Schaffalitzky F, Kadir T, Van Gool L (2005) A comparison of affine region detectors. Int J Comput Vis 65(1\/2):43\u201372","journal-title":"Int J Comput Vis"},{"key":"643_CR69","doi-asserted-by":"crossref","unstructured":"Mikolajczyk K, Uemura H (2008) Action recognition with motion-appearance vocabulary forest. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2008.4587628"},{"key":"643_CR70","unstructured":"Miller JA, Baramidze G (2005) Simulation and the semantic web. In: Proc. of the winter simulation conference (WSC)"},{"issue":"3","key":"643_CR71","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1109\/MMUL.2006.63","volume":"13","author":"M Naphade","year":"2006","unstructured":"Naphade M, Smith J, Tesic J, Chang SF, Kennedy L, Hauptmann A, Curtis J (2006) Large-scale concept ontology for multimedia. IEEE Multimed 13(3):86\u201391","journal-title":"IEEE Multimed"},{"key":"643_CR72","doi-asserted-by":"crossref","unstructured":"Neumann B, Moeller R (2006) On scene interpretation with description logics. In: Cognitive vision systems: sampling the spectrum of approaches. Lecture notes in computer science, vol 3948. Springer, pp 247\u2013278","DOI":"10.1007\/11414353_15"},{"key":"643_CR73","unstructured":"Nevatia R, Hobbs J, Bolles B (2004) An ontology for video event representation. In: Proc. of the conference on computer vision and pattern recognition workshop (CVPRW)"},{"key":"643_CR74","doi-asserted-by":"crossref","unstructured":"Niebles J, Fei-Fei L (2007) A hierarchical model of shape and appearance for human action classification. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2007.383132"},{"key":"643_CR75","unstructured":"Nister D, Stewenius H (2006) Scalable recognition with a vocabulary tree. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR76","doi-asserted-by":"crossref","unstructured":"Nowak E, Jurie F, Triggs B (2006) Sampling strategies for bag-of-features image classification. In: Proc. of European conference on computer vision (ECCV)","DOI":"10.1007\/11744085_38"},{"key":"643_CR77","doi-asserted-by":"crossref","first-page":"719","DOI":"10.1109\/TSMCB.2005.861864","volume":"36","author":"A Oikonomopoulos","year":"2005","unstructured":"Oikonomopoulos A, Patras I, Pantic M (2005) Spatiotemporal salient points for visual recognition of human actions. IEEE Trans Syst Man Cybern 36:719","journal-title":"IEEE Trans Syst Man Cybern"},{"key":"643_CR78","unstructured":"Over P, Awad G, Fiscus J, Michel M, Smeaton AF, Kraaij W (2009) TRECVid 2009\u2013goals, tasks, data, evaluation mechanisms and metrics. In: Proc. of the TRECVID workshop. Gaithersburg, USA"},{"issue":"1","key":"643_CR79","doi-asserted-by":"crossref","first-page":"187","DOI":"10.1016\/j.dss.2008.06.008","volume":"46","author":"A Paschke","year":"2008","unstructured":"Paschke A, Bichler M (2008) Knowledge representation concepts for automated SLA management. Decis Support Syst 46(1):187\u2013205","journal-title":"Decis Support Syst"},{"key":"643_CR80","doi-asserted-by":"crossref","unstructured":"Pattanasri N, Jatowt A, Tanaka K (2006) Enhancing comprehension of events in video through explanation-on-demand hypervideo. In: Advances in multimedia modeling. Lecture notes in computer science, vol 4351. Springer, pp 535\u2013544","DOI":"10.1007\/978-3-540-69423-6_52"},{"issue":"6","key":"643_CR81","doi-asserted-by":"crossref","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R Poppe","year":"2010","unstructured":"Poppe R (2010) A survey on vision-based human action recognition. Image Vis Comput 28(6):976\u2013990","journal-title":"Image Vis Comput"},{"issue":"10","key":"643_CR82","doi-asserted-by":"crossref","first-page":"1225","DOI":"10.1109\/TCSVT.2005.854237","volume":"15","author":"D Sadlier","year":"2005","unstructured":"Sadlier D, O\u2019Connor N (2005) Event detection in field sports video using audio\u2013visual features and a support vector machine. IEEE Trans Circuits Syst Video Technol 15(10):1225\u20131233","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"643_CR83","doi-asserted-by":"crossref","unstructured":"SanMiguel J, Martinez J, Garcia A (2009) An ontology for event detection and its application in surveillance video. In: Proc. of int\u2019l conference on advanced video and signal-based surveillance (AVSS)","DOI":"10.1109\/AVSS.2009.28"},{"key":"643_CR84","unstructured":"Savarese S, Winn J, Criminisi A (2006) Discriminative object class models of appearance and shape by correlatons. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR85","doi-asserted-by":"crossref","unstructured":"Savarese S, Del Pozo A, Niebles JC, Fei-Fei L (2008) Spatial-temporal correlatons for unsupervised action classification. In: Proc. of workshop on motion and video computing","DOI":"10.1109\/WMVC.2008.4544068"},{"key":"643_CR86","doi-asserted-by":"crossref","unstructured":"Scherp A, Franz T, Saathoff C, Staab S (2009) F\u2013a model of events based on the foundational ontology DOLCE+DnS ultralight. In: Proc. of int\u2019l conference on knowledge capture (K-CAP)","DOI":"10.1145\/1597735.1597760"},{"key":"643_CR87","doi-asserted-by":"crossref","unstructured":"Schuldt C, Laptev I, Caputo B (2004) Recognizing human actions: a local SVM approach. In: Proc. of int\u2019l conference on pattern recognition (ICPR)","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"643_CR88","doi-asserted-by":"crossref","unstructured":"Scovanner P, Ali S, Shah M (2007) A 3-Dimensional SIFT descriptor and its application to action recognition. In: Proc. of ACM multimedia (MM)","DOI":"10.1145\/1291233.1291311"},{"key":"643_CR89","doi-asserted-by":"crossref","unstructured":"Seidenari L, Bertini M (2010) Non-parametric anomaly detection exploiting space-time features. In: Proc. of ACM multimedia (MM)","DOI":"10.1145\/1873951.1874170"},{"key":"643_CR90","unstructured":"Shet V, Harwood D, Davis L (2005) Vidmap: video monitoring of activity with prolog. In: Proc. of IEEE int\u2019l conference on advanced video and signal-based surveillance (AVSS)"},{"key":"643_CR91","doi-asserted-by":"crossref","unstructured":"Sivic J, Zisserman A (2003) Video google: a text retrieval approach to object matching in videos. In: Proc. of int\u2019l conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"643_CR92","doi-asserted-by":"crossref","unstructured":"Smeaton AF, Over P, Kraaij W (2006) Evaluation campaigns and TRECVid. In: Proc. of int\u2019l workshop on multimedia information retrieval (MIR)","DOI":"10.1145\/1178677.1178722"},{"key":"643_CR93","doi-asserted-by":"crossref","unstructured":"Snidaro L, Belluz M, Foresti G (2007) Domain knowledge for surveillance applications. In: Proc. of int\u2019l conference on information fusion","DOI":"10.1109\/ICIF.2007.4408118"},{"issue":"1","key":"643_CR94","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/B:MTAP.0000046380.27575.a5","volume":"25","author":"C Snoek","year":"2005","unstructured":"Snoek C, Worring M (2005) Multimodal video indexing: A review of the state-of-the-art. Multimed Tools Appl 25(1):5\u201335","journal-title":"Multimed Tools Appl"},{"key":"643_CR95","doi-asserted-by":"crossref","unstructured":"Tran SD, Davis LS (2008) Event modeling and recognition using Markov logic networks. In: Proc. of European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-540-88688-4_45"},{"issue":"3","key":"643_CR96","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1007\/s11042-005-0894-x","volume":"26","author":"C Tsinaraki","year":"2005","unstructured":"Tsinaraki C, Polydoros P, Kazasis F, Christodoulakis S (2005) Ontology-based semantic indexing for MPEG-7 and TV-Anytime audiovisual content. Multimed Tools Appl 26(3):299\u2013325","journal-title":"Multimed Tools Appl"},{"key":"643_CR97","unstructured":"TV Anytime Forum. http:\/\/www.tv-anytime.org\/ . Accessed 11 October 2010"},{"issue":"2","key":"643_CR98","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1007\/s11042-009-0402-9","volume":"50","author":"R Vezzani","year":"2010","unstructured":"Vezzani R, Cucchiara R (2010) Video surveillance online repository (ViSOR): an integrated framework. Multimed Tools Appl 50(2):359\u2013380. http:\/\/www.openvisor.org","journal-title":"Multimed Tools Appl"},{"key":"643_CR99","unstructured":"Viola PA, Jones MJ (2001) Rapid object detection using a boosted cascade of simple features. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)"},{"key":"643_CR100","doi-asserted-by":"crossref","unstructured":"Wang Xj, Mamadgi S, Thekdi A, Kelliher A, Sundaram H (2007) Eventory\u2014an event based media repository. In: Proc of the int\u2019l conference on semantic computing (ICSC)","DOI":"10.1109\/ICSC.2007.70"},{"key":"643_CR101","doi-asserted-by":"crossref","unstructured":"Wang F, Jiang YG, Ngo CW (2008) Video event detection using motion relativity and visual relatedness. In: Proc. of ACM multimedia (MM)","DOI":"10.1145\/1459359.1459392"},{"key":"643_CR102","doi-asserted-by":"crossref","unstructured":"Willems G, Tuytelaars T, Van Gool L (2008) An efficient dense and scale-invariant spatio-temporal interest point detector. In: Proc. of European conference on computer vision (ECCV)","DOI":"10.1007\/978-3-540-88688-4_48"},{"key":"643_CR103","doi-asserted-by":"crossref","unstructured":"Winder SAJ, Hua G, Brown M (2009) Picking the best DAISY. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2009.5206839"},{"key":"643_CR104","doi-asserted-by":"crossref","unstructured":"Wong SF, Cipolla R (2007) Extracting spatiotemporal interest points using global information. In: Proc. of int\u2019l conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2007.4408923"},{"key":"643_CR105","doi-asserted-by":"crossref","unstructured":"Wong SF, Kim TK, Cipolla R (2007) Learning motion categories using both semantic and structural information. In: Proc. of int\u2019l conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2007.383332"},{"issue":"11","key":"643_CR106","doi-asserted-by":"crossref","first-page":"1985","DOI":"10.1109\/TPAMI.2008.129","volume":"30","author":"D Xu","year":"2008","unstructured":"Xu D, Chang SF (2008) Video event recognition using kernel methods with multilevel temporal alignment. IEEE Trans Pattern Anal Mach Intell 30(11):1985\u20131997","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"643_CR107","unstructured":"Xu P, Xie L, Chang SF, Divakaran A, Vetro A, Sun H (2001) Algorithms and system for segmentation and structure analysis in soccer video. In: Proc. of int\u2019l conference on multimedia & expo (ICME)"},{"key":"643_CR108","unstructured":"Xu G, Ma YF, Zhang HJ, Yang S (2003) A HMM based semantic analysis framework for sports game event detection. In: Proc. of IEEE int\u2019l conference on image processing (ICIP). Barcelona, Spain"},{"key":"643_CR109","doi-asserted-by":"crossref","unstructured":"Yang J, Hauptmann AG (2006) Exploring temporal consistency for video analysis and retrieval. In: Proc. of int\u2019l workshop on multimedia information retrieval (MIR)","DOI":"10.1145\/1178677.1178685"},{"key":"643_CR110","doi-asserted-by":"crossref","unstructured":"Yang J, Jiang YG, Hauptmann AG, Ngo CW (2007) Evaluating bag-of-visual-words representations in scene classification. In: Proc. of int\u2019l workshop on multimedia information retrieval (MIR)","DOI":"10.1145\/1290082.1290111"},{"key":"643_CR111","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/s00138-008-0132-4","volume":"19","author":"B Zhan","year":"2008","unstructured":"Zhan B, Monekosso D, Remagnino P, Velastin S, Xu LQ (2008) Crowd analysis: a survey. Mach Vis Appl 19:345\u2013357","journal-title":"Mach Vis Appl"},{"issue":"2","key":"643_CR112","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1007\/s11263-006-9794-4","volume":"73","author":"J Zhang","year":"2007","unstructured":"Zhang J, Marsza\u0142ek M, Lazebnik S, Schmid C (2007) Local features and kernels for classification of texture and object categories: a comprehensive study. Int J Comput Vis 73(2):213\u2013238","journal-title":"Int J Comput Vis"},{"key":"643_CR113","doi-asserted-by":"crossref","unstructured":"Zhou X, Zhuang X, Yan S, Chang SF, Hasegawa-Johnson M, Huang T (2008) SIFT-bag kernel for video event analysis. In: Proc. of ACM multimedia (MM), pp 229\u2013238","DOI":"10.1145\/1459359.1459391"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-010-0643-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-010-0643-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-010-0643-7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,6]],"date-time":"2019-06-06T00:24:32Z","timestamp":1559780672000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-010-0643-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010,11,10]]},"references-count":113,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2011,1]]}},"alternative-id":["643"],"URL":"https:\/\/doi.org\/10.1007\/s11042-010-0643-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2010,11,10]]}}}