{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T10:36:33Z","timestamp":1761561393284},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2009,12,23]],"date-time":"2009-12-23T00:00:00Z","timestamp":1261526400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2010,7]]},"DOI":"10.1007\/s11263-009-0308-z","type":"journal-article","created":{"date-parts":[[2009,12,22]],"date-time":"2009-12-22T16:12:06Z","timestamp":1261498326000},"page":"339-362","source":"Crossref","is-referenced-by-count":46,"title":["Volumetric Features for Video Event Detection"],"prefix":"10.1007","volume":"88","author":[{"given":"Yan","family":"Ke","sequence":"first","affiliation":[]},{"given":"Rahul","family":"Sukthankar","sequence":"additional","affiliation":[]},{"given":"Martial","family":"Hebert","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2009,12,23]]},"reference":[{"issue":"3","key":"308_CR1","doi-asserted-by":"crossref","first-page":"428","DOI":"10.1006\/cviu.1998.0744","volume":"73","author":"J. K. Aggarwal","year":"1999","unstructured":"Aggarwal, J. K., & Cai, Q. (1999). Human motion analysis: A\u00a0review. Computer Vision and Image Understanding, 73(3), 428\u2013440.","journal-title":"Computer Vision and Image Understanding"},{"key":"308_CR2","doi-asserted-by":"crossref","unstructured":"Ankerst, M., Kastenm\u00fcller, G., Kriegel, H.-P., & Seidl, T. (1999). 3D shape histograms for similarity search and classification in spatial databases. In Proceedings of international symposium of advances in spatial databases.","DOI":"10.1007\/3-540-48482-5_14"},{"key":"308_CR3","doi-asserted-by":"crossref","unstructured":"Arambel, P., Silver, J., Krant, J., Antone, M., & Strat, T. (2004). Multiple-hypothesis tracking of multiple ground targets from aerial video with dynamic sensor control. In Proceedings of SPIE 5429 (Signal processing, sensor fusion, and target recognition XIII).","DOI":"10.1117\/12.541208"},{"key":"308_CR4","doi-asserted-by":"crossref","unstructured":"Aslam, J. A., Pavlu, V., & Yilmaz, E. (2005). A geometric interpretation of R-precision and its correlation with average precision. In Proceedings of the international ACM SIGIR conference on research and development in information retrieval.","DOI":"10.1145\/1076034.1076134"},{"key":"308_CR5","unstructured":"Bell, W., Felzenszwalb, P., & Huttenlocher, D. (1999). Detection and long term tracking of moving objects in aerial video (Technical report). Cornell University."},{"key":"308_CR6","doi-asserted-by":"crossref","unstructured":"Belongie, S., Malik, J., & Puzicha, J. (2002). Shape matching and object recognition using shape contexts. IEEE Transactions on Pattern Analysis and Machine Intelligence, 24(4).","DOI":"10.1109\/34.993558"},{"key":"308_CR7","doi-asserted-by":"crossref","unstructured":"Blank, M., Gorelick, L., Shechtman, E., Irani, M., & Basri, R. (2005). Actions as space-time shapes. In Proc. ICCV.","DOI":"10.1109\/ICCV.2005.28"},{"key":"308_CR8","doi-asserted-by":"crossref","unstructured":"Bobick, A. F., & Davis, J. W. (2001). The recognition of human movement using temporal templates. IEEE Transactions on Pattern Analysis and Machine Intelligence, 23(3).","DOI":"10.1109\/34.910878"},{"key":"308_CR9","unstructured":"Boiman, O., & Irani, M. (2006). Similarity by composition. In NIPS."},{"key":"308_CR10","doi-asserted-by":"crossref","unstructured":"Buckley, C., & Voorhees, E. M. (2000). Evaluating evaluation measure stability. In Proceedings of international ACM SIGIR conference on research and development in information retrieval.","DOI":"10.1145\/345508.345543"},{"key":"308_CR11","unstructured":"Chang, C.-C., & Lin, C.-J. (2001). LIBSVM: a library for support vector machines. Software available at www.csie.ntu.edu.tw\/~cjlin\/libsvm ."},{"key":"308_CR12","unstructured":"Cheng, Y. (1995). Mean shift, mode seeking, and clustering. IEEE Transactions on Pattern Analysis and Machine Intelligence, 17(8)."},{"key":"308_CR13","doi-asserted-by":"crossref","unstructured":"Comaniciu, D., & Meer, P. (2002). Mean shift: A robust approach toward feature space analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence, 24(5).","DOI":"10.1109\/34.1000236"},{"key":"308_CR14","doi-asserted-by":"crossref","unstructured":"Cour, T., & Shi, J. (2007). Recognizing objects by piecing together the segmentation puzzle. In Proc. CVPR.","DOI":"10.1109\/CVPR.2007.383051"},{"key":"308_CR15","doi-asserted-by":"crossref","unstructured":"Cyr, C. M., & Kimia, B. B. (2001). 3D object recognition using shape similiarity-based aspect graph. In Proc. ICCV.","DOI":"10.1109\/ICCV.2001.937526"},{"key":"308_CR16","unstructured":"DeMenthon, D. (2002). Spatio-temporal segmentation of video by hierarchical mean shift analysis. In Statistical methods in video processing workshop."},{"key":"308_CR17","doi-asserted-by":"crossref","unstructured":"DeMenthon, D., & Doermann, D. (2006). Video retrieval of near-duplicates using k-nearest neighbor retrieval of spatio-temporal descriptors. Multimedia Tools and Applications, 30(3).","DOI":"10.1007\/s11042-006-0029-z"},{"key":"308_CR18","doi-asserted-by":"crossref","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., & Belongie, S. (2005). Behavior recognition via sparse spatio-temporal features. In IEEE VS-PETS workshop.","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"308_CR19","doi-asserted-by":"crossref","unstructured":"Efros, A., Berg, A., Mori, G., & Malik, J. (2003). Recognizing action at a distance. In Proc. ICCV.","DOI":"10.1109\/ICCV.2003.1238420"},{"key":"308_CR20","doi-asserted-by":"crossref","unstructured":"Fei-Fei, L., Fergus, R., & Perona, P. (2006). One-shot learning of object categories. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(4).","DOI":"10.1109\/TPAMI.2006.79"},{"key":"308_CR21","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., & Huttenlocher, D. (2005). Pictorial structures for object recognition. International Journal of Computer Vision, 61(1).","DOI":"10.1023\/B:VISI.0000042934.15159.49"},{"key":"308_CR22","doi-asserted-by":"crossref","unstructured":"Fischler, M. A., & Elschlager, R. A. (1973). The representation and matching of pictorial structures. IEEE Transactions on Computers, 22(1).","DOI":"10.1109\/T-C.1973.223602"},{"key":"308_CR23","doi-asserted-by":"crossref","unstructured":"Funkhouser, T., Min, P., Kazhdan, M., Chen, J., Halderman, A., Dobkin, D., & Jacobs, D. (2003). A search engine for 3D models. ACM Transactions on Graphics.","DOI":"10.1145\/588272.588279"},{"key":"308_CR24","doi-asserted-by":"crossref","unstructured":"Gorelick, L., Galun, M., Sharon, E., Basri, R., & Brandt, A. (2006). Shape representation and classification using the Poisson equation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(12).","DOI":"10.1109\/TPAMI.2006.253"},{"key":"308_CR25","unstructured":"Hamid, R., Maddi, S., Johnson, A., Bobick, A., Essa, I., & Isbell, C. (2005). Discovery and characterization of activities from event-streams. In Proc. UAI."},{"key":"308_CR26","doi-asserted-by":"crossref","unstructured":"Hongeng, S., & Nevatia, R. (2001). Multi-agent event recognition. In Proc. ICCV.","DOI":"10.1109\/ICCV.2001.937608"},{"key":"308_CR27","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Serre, T., Wolf, L., & Poggio, T. (2007). A biologically inspired system for action recognition. In Proc. ICCV.","DOI":"10.1109\/ICCV.2007.4408988"},{"key":"308_CR28","doi-asserted-by":"crossref","unstructured":"Jiang, H., Drew, M. S., & Li, Z.-N. (2006). Successive convex matching for action detection. In Proc. CVPR.","DOI":"10.1109\/CVPR.2006.297"},{"key":"308_CR29","unstructured":"Kazhdan, M., Funkhouser, T., & Rusinkiewicz, S. (2003). Rotation invariant spherical harmonic representation of 3D shape descriptors. In Symposium on geometry processing."},{"key":"308_CR30","unstructured":"Ke, Y., Sukthankar, R., & Hebert, M. (2005). Efficient visual event detection using volumetric features. In Proc. ICCV."},{"key":"308_CR31","doi-asserted-by":"crossref","unstructured":"Ke, Y., Sukthankar, R., & Hebert, M. (2007a). Event detection in crowded videos. In Proc. ICCV.","DOI":"10.1109\/ICCV.2007.4409011"},{"key":"308_CR32","doi-asserted-by":"crossref","unstructured":"Ke, Y., Sukthankar, R., & Hebert, M. (2007b). Spatio-temporal shape and flow correlation for action recognition. In Workshop on visual surveillance.","DOI":"10.1109\/CVPR.2007.383512"},{"key":"308_CR33","doi-asserted-by":"crossref","unstructured":"Laptev, I., & Lindeberg, T. (2003). Space-time interest points. In Proc. ICCV.","DOI":"10.1109\/ICCV.2003.1238378"},{"key":"308_CR34","doi-asserted-by":"crossref","unstructured":"Laptev, I., & Perez, P. (2007). Retrieving actions in movies. In Proc. ICCV.","DOI":"10.1109\/ICCV.2007.4409105"},{"key":"308_CR35","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., & Rozenfeld, B. (2008). Learning realistic human actions from movies. In Proc. CVPR.","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"308_CR36","doi-asserted-by":"crossref","unstructured":"Leibe, B., Schindler, K., & Gool, L. V. (2007). Coupled detection and trajectory estimation for multi-object tracking. In Proc. ICCV.","DOI":"10.1109\/ICCV.2007.4408936"},{"key":"308_CR37","doi-asserted-by":"crossref","unstructured":"Leung, Y., Zhang, J.-S., & Xu, Z.-B. (2000). Clustering by scale-space filtering. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22.","DOI":"10.1109\/34.895974"},{"key":"308_CR38","doi-asserted-by":"crossref","unstructured":"Ling, H., & Jacobs, D. W. (2007). Shape classification using the inner-distance. IEEE Transactions on Pattern Analysis and Machine Intelligence, 29(2).","DOI":"10.1109\/TPAMI.2007.41"},{"key":"308_CR39","doi-asserted-by":"crossref","unstructured":"Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60(2).","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"308_CR40","unstructured":"Lucas, B., & Kanade, T. (1981). An iterative image registration technique with an application to stereo vision. In Proceedings of the 7th international joint conference on artificial intelligence."},{"key":"308_CR41","doi-asserted-by":"crossref","unstructured":"Medioni, G., Cohen, I., Bremond, F., Hongeng, S., & Nevatia, R. (2001). Event detection and analysis from video streams. IEEE Transactions on Pattern Analysis and Machine Intelligence.","DOI":"10.1109\/34.946990"},{"key":"308_CR42","doi-asserted-by":"crossref","unstructured":"Mori, G. (2005). Guiding model search using segmentation. In Proc. ICCV.","DOI":"10.1109\/ICCV.2005.112"},{"key":"308_CR43","doi-asserted-by":"crossref","unstructured":"Niebles, J. C., Wang, H., & Fei-Fei, L. (2006). Unsupervised learning of human action categories using spatial-temporal words. In Proc. BMVC.","DOI":"10.5244\/C.20.127"},{"key":"308_CR44","doi-asserted-by":"crossref","unstructured":"Odobez, J.-M., & Bouthemy, P. (1995). Robust multiresolution estimation of parametric motion models. Journal of Visual Communication and Image Representation, 6(4).","DOI":"10.1006\/jvci.1995.1029"},{"key":"308_CR45","unstructured":"QuickFix Tight Abs Workout. Peter Pan Studios. ASIN: B00004Z73V."},{"key":"308_CR46","unstructured":"Ramanan, D., & Forsyth, D. A. (2003). Automatic annotation of everyday movements. In NIPS."},{"key":"308_CR47","doi-asserted-by":"crossref","unstructured":"Ramanan, D., Forsyth, D. A., & Barnard, K. (2006). Building models of animals from video. IEEE Transactions on Pattern Analysis and Machine Intelligence, 28(8).","DOI":"10.1109\/TPAMI.2006.155"},{"key":"308_CR48","doi-asserted-by":"crossref","unstructured":"Ramanan, D., Forsyth, D. A., & Zisserman, A. (2007). Tracking people by learning their appearance. IEEE Transactions on Pattern Analysis and Machine Intelligence, 29(1).","DOI":"10.1109\/TPAMI.2007.250600"},{"key":"308_CR49","doi-asserted-by":"crossref","unstructured":"Sali, E., & Ullman, S. (1999). Combining class-specific fragments for object classification. In Proc. BMVC.","DOI":"10.5244\/C.13.21"},{"key":"308_CR50","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., & Caputo, B. (2004). Recognizing human actions: A local SVM approach. In Proc. ICPR.","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"308_CR51","doi-asserted-by":"crossref","unstructured":"Shechtman, E., & Irani, M. (2005). Space-time behavior based correlation. In Proc. CVPR.","DOI":"10.1109\/CVPR.2005.328"},{"key":"308_CR52","doi-asserted-by":"crossref","unstructured":"Shechtman, E., & Irani, M. (2007a). Matching local self-similarities across images and video. In Proc. CVPR.","DOI":"10.1109\/CVPR.2007.383198"},{"key":"308_CR53","doi-asserted-by":"crossref","unstructured":"Shechtman, E., & Irani, M. (2007b). Space-time behavior based correlation -OR- How to tell if two underlying motion fields are similar without computing them? IEEE Transactions on Pattern Analysis and Machine Intelligence, 29(11).","DOI":"10.1109\/TPAMI.2007.1119"},{"key":"308_CR54","doi-asserted-by":"crossref","unstructured":"Sheikh, Y., Sheikh, M., & Shah, M. (2005). Exploring the space of a human action. In Proc. ICCV.","DOI":"10.1109\/ICCV.2005.90"},{"key":"308_CR55","unstructured":"Shi, J., & Malik, J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(8)."},{"key":"308_CR56","doi-asserted-by":"crossref","unstructured":"Sivic, J., & Zisserman, A. (2003). Video Google: A text retrieval approach to object matching in videos. In Proc. ICCV.","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"308_CR57","doi-asserted-by":"crossref","unstructured":"Srinivasan, P., & Shi, J. (2007). Bottom-up recognition and parsing of the human body. In Proc. CVPR.","DOI":"10.1109\/CVPR.2007.383301"},{"key":"308_CR58","doi-asserted-by":"crossref","unstructured":"Veit, P. B. T., & Cao, F. (2004). Probabilistic parameter-free motion detection. In Proc. CVPR.","DOI":"10.1109\/CVPR.2004.1315102"},{"key":"308_CR59","doi-asserted-by":"crossref","unstructured":"Vaswani, N., Chowdhury, A. R., & Chellappa, R. (2003). Activity recognition using the dynamics of the configuration of interacting objects. In Proc. CVPR.","DOI":"10.1109\/CVPR.2003.1211526"},{"key":"308_CR60","doi-asserted-by":"crossref","unstructured":"Veeraraghavan, A., Chellappa, R., & Roy-Chowdhury, A. K. (2006). The function space of an activity. In Proc. CVPR.","DOI":"10.1109\/CVPR.2006.304"},{"key":"308_CR61","doi-asserted-by":"crossref","unstructured":"Viola, P., & Jones, M. (2004). Robust real-time face detection. International Journal of Computer Vision, 57(2).","DOI":"10.1023\/B:VISI.0000013087.49260.fb"},{"key":"308_CR62","doi-asserted-by":"crossref","unstructured":"Wang, J., Bhat, P., Colburn, A., Agrawala, M., & Cohen, M. (2005). Interactive video cutout. In ACM SIGGRAPH.","DOI":"10.1145\/1186822.1073233"},{"key":"308_CR63","doi-asserted-by":"crossref","unstructured":"Wang, J., Thiesson, B., Xu, Y., & Cohen, M. (2004). Image and video segmentation by anisotropic kernel mean shift. In Proc. ECCV.","DOI":"10.1007\/978-3-540-24671-8_19"},{"key":"308_CR64","doi-asserted-by":"crossref","unstructured":"Wang, L., Hu, W., & Tan, T. (2003). Recent developments in human motion analysis. Pattern Recognition, 36(3).","DOI":"10.1016\/S0031-3203(02)00100-0"},{"key":"308_CR65","doi-asserted-by":"crossref","unstructured":"Weber, M., Welling, M., & Perona, P. (2000). Unsupervised learning of models for recognition. In Proc. ECCV.","DOI":"10.1007\/3-540-45054-8_2"},{"key":"308_CR66","unstructured":"Weinland, D., Ronfard, R., & Boyer, E. (2006a). Automatic discovery of action taxonomies from multiple views. In Proc. CVPR."},{"key":"308_CR67","doi-asserted-by":"crossref","unstructured":"Weinland, D., Ronfard, R., & Boyer, E. (2006b). Free viewpoint action recognition using motion history volumes. Computer Vision and Image Understanding, 104(2).","DOI":"10.1016\/j.cviu.2006.07.013"},{"key":"308_CR68","unstructured":"Wimbledon 2000 Semi-Final\u2014Agassi vs. Rafter. SRO Sports Entertainment. ISBN: 0-7697-7886-0."},{"key":"308_CR69","unstructured":"Yilmaz, A., & Shah, M. (2005). Actions as objects: A novel action representation. In Proc. CVPR."},{"key":"308_CR70","unstructured":"YouTube (2008). http:\/\/www.youtube.com\/ ."},{"key":"308_CR71","doi-asserted-by":"crossref","unstructured":"Zhu, G., Xu, C., Gao, W., & Huang, Q. (2006a). Action recognition in broadcast tennis video using optical flow and support vector machine. In ECCV workshop in HCI.","DOI":"10.1007\/11754336_9"},{"key":"308_CR72","doi-asserted-by":"crossref","unstructured":"Zhu, G., Xu, C., Huang, Q., & Gao, W. (2006b). Action recognition in broadcast tennis video. In Proc. ICPR.","DOI":"10.1007\/11754336_9"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-009-0308-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-009-0308-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-009-0308-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,28]],"date-time":"2023-05-28T22:57:56Z","timestamp":1685314676000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-009-0308-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2009,12,23]]},"references-count":72,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2010,7]]}},"alternative-id":["308"],"URL":"https:\/\/doi.org\/10.1007\/s11263-009-0308-z","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2009,12,23]]}}}