{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T02:12:03Z","timestamp":1771467123359,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":27,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642337116","type":"print"},{"value":"9783642337123","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33712-3_51","type":"book-chapter","created":{"date-parts":[[2012,9,25]],"date-time":"2012-09-25T23:00:38Z","timestamp":1348614038000},"page":"707-721","source":"Crossref","is-referenced-by-count":64,"title":["Spatio-Temporal Phrases for Activity Recognition"],"prefix":"10.1007","author":[{"given":"Yimeng","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoming","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ming-Ching","family":"Chang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weina","family":"Ge","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tsuhan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"51_CR1","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., Belongie, S.J.: Behavior recognition via sparse spatio-temporal features. In: PETS Workshop (2005)"},{"key":"51_CR2","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., Rozenfeld, B.: Learning realistic human actions from movies. In: CVPR (2008)","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"51_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"650","DOI":"10.1007\/978-3-540-88688-4_48","volume-title":"Computer Vision \u2013 ECCV 2008","author":"G. Willems","year":"2008","unstructured":"Willems, G., Tuytelaars, T., Van Gool, L.: An Efficient Dense and Scale-Invariant Spatio-Temporal Interest Point Detector. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008, Part II. LNCS, vol.\u00a05303, pp. 650\u2013663. Springer, Heidelberg (2008)"},{"key":"51_CR4","doi-asserted-by":"publisher","first-page":"539","DOI":"10.1109\/TPAMI.2008.87","volume":"31","author":"X.G. Wang","year":"2009","unstructured":"Wang, X.G., Ma, X.X., Grimson, W.E.L.: Unsupervised activity perception in crowded and complicated scenes using hierarchical Bayesian models. PAMI\u00a031, 539\u2013555 (2009)","journal-title":"PAMI"},{"key":"51_CR5","doi-asserted-by":"crossref","unstructured":"Liu, J.G., Luo, J.B., Shah, M.: Recognizing realistic actions from videos \u201cin the wild\u201d. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206744"},{"key":"51_CR6","unstructured":"Liu, J.G., Shah, M.: Learning human actions via information maximization. In: CVPR (2008)"},{"key":"51_CR7","doi-asserted-by":"crossref","unstructured":"Wong, S.F., Kim, T.K., Cipolla, R.: Learning motion categories using both semantic and structural information. In: CVPR (2007)","DOI":"10.1109\/CVPR.2007.383332"},{"key":"51_CR8","doi-asserted-by":"crossref","unstructured":"Gaur, U., Zhu, Y., Song, B., Roy-Chowdhury, A.: A \u201cstring of feature graphs\u201d model for recognition of complex activities in natural videos. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126548"},{"key":"51_CR9","unstructured":"Wang, P., Abowd, G.D., Rehg, J.M.: Quasi-periodic event analysis for social game retrieval. In: ICCV (2009)"},{"key":"51_CR10","doi-asserted-by":"crossref","unstructured":"Duan, L., Xu, D., Tsang, I.W.H., Luo, J.: Visual event recognition in videos by learning from web data. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539870"},{"key":"51_CR11","doi-asserted-by":"crossref","unstructured":"Nowozin, S., Bakir, G., Tsuda, K.: Discriminative subsequence mining for action classification. In: ICCV (2007)","DOI":"10.1109\/ICCV.2007.4409049"},{"key":"51_CR12","unstructured":"Sun, J., Wu, X., Yan, S.C., Cheong, L.F., Chua, T.S., Li, J.T.: Hierarchical spatio-temporal context modeling for action recognition. In: CVPR (2009)"},{"key":"51_CR13","doi-asserted-by":"crossref","unstructured":"Savarese, S., Pozo, A.D., Niebles, J.C., Li, F.F.: Spatial-temporal correlatons for unsupervised action classification. In: WMVC (2008)","DOI":"10.1109\/WMVC.2008.4544068"},{"key":"51_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/978-3-540-88682-2_18","volume-title":"Computer Vision \u2013 ECCV 2008","author":"A. Gilbert","year":"2008","unstructured":"Gilbert, A., Illingworth, J., Bowden, R.: Scale Invariant Action Recognition Using Compound Features Mined from Dense Spatio-temporal Corners. In: Forsyth, D., Torr, P., Zisserman, A. (eds.) ECCV 2008, Part I. LNCS, vol.\u00a05302, pp. 222\u2013233. Springer, Heidelberg (2008)"},{"key":"51_CR15","doi-asserted-by":"crossref","unstructured":"Kovashka, A., Grauman, K.: Learning a hierarchy of discriminative space-time neighborhood features for human action recognition. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539881"},{"key":"51_CR16","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Jia, Z., Chen, T.: Image retrieval with geometry-preserving visual phrases. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995528"},{"key":"51_CR17","doi-asserted-by":"crossref","unstructured":"Wu, S., Moore, B.E., Shah, M.: Chaotic invariants of lagrangian particle trajectories for anomaly detection in crowded scenes. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539882"},{"key":"51_CR18","doi-asserted-by":"crossref","unstructured":"Messing, R., Pal, C., Kautz, H.A.: Activity recognition using the velocity histories of tracked keypoints. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459154"},{"key":"51_CR19","doi-asserted-by":"crossref","unstructured":"Yao, A., Gall, J., Van Gool, L.: A Hough transform-based voting framework for action recognition. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539883"},{"key":"51_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Chen, T.: Efficient kernels for identifying unbounded-order spatial features. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206791"},{"key":"51_CR21","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., Caputo, B.: Recognizing human actions: a local SVM approach. In: ICPR (2004)","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"51_CR22","doi-asserted-by":"crossref","unstructured":"Chang, M.C., Krahnstoever, N., Ge, W.: Probabilistic group-level motion analysis and scenario recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126312"},{"key":"51_CR23","doi-asserted-by":"crossref","unstructured":"Brendel, W., Todorovic, S.: Learning spatiotemporal graphs of human activities. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126316"},{"key":"51_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1007\/978-3-642-17711-8_28","volume-title":"Recognizing Patterns in Signals, Speech, Images and Videos","author":"M.S. Ryoo","year":"2010","unstructured":"Ryoo, M.S., Chen, C.-C., Aggarwal, J.K., Roy-Chowdhury, A.: An Overview of Contest on Semantic Description of Human Activities (SDHA) 2010. In: \u00dcnay, D., \u00c7ataltepe, Z., Aksoy, S. (eds.) ICPR 2010. LNCS, vol.\u00a06388, pp. 270\u2013285. Springer, Heidelberg (2010)"},{"key":"51_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"306","DOI":"10.1007\/978-3-642-17711-8_31","volume-title":"Recognizing Patterns in Signals, Speech, Images and Videos","author":"D. Waltisberg","year":"2010","unstructured":"Waltisberg, D., Yao, A., Gall, J., Van Gool, L.: Variations of a Hough-Voting Action Recognition System. In: \u00dcnay, D., \u00c7ataltepe, Z., Aksoy, S. (eds.) ICPR 2010. LNCS, vol.\u00a06388, pp. 306\u2013312. Springer, Heidelberg (2010)"},{"key":"51_CR26","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Aggarwal, J.K.: Spatio-temporal relationship match: Video structure comparison for recognition of complex human activities. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459361"},{"key":"51_CR27","unstructured":"Zhang, Y., Ge, W., Chang, M.C., Liu, X.: Group context learning for event recognition. In: WACV (2012)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2012"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33712-3_51","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,2]],"date-time":"2019-12-02T09:13:13Z","timestamp":1575277993000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33712-3_51"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642337116","9783642337123"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33712-3_51","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}