{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T05:25:17Z","timestamp":1771046717204,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2015,8,2]],"date-time":"2015-08-02T00:00:00Z","timestamp":1438473600000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2016,9]]},"DOI":"10.1007\/s11263-015-0847-4","type":"journal-article","created":{"date-parts":[[2015,8,1]],"date-time":"2015-08-01T03:24:55Z","timestamp":1438399495000},"page":"307-328","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":33,"title":["First-Person Activity Recognition: Feature, Temporal Structure, and Prediction"],"prefix":"10.1007","volume":"119","author":[{"given":"M. S.","family":"Ryoo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Larry","family":"Matthies","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2015,8,2]]},"reference":[{"key":"847_CR1","first-page":"16:1","volume":"43","author":"J\u00a0K Aggarwal","year":"2011","unstructured":"Aggarwal, J\u00a0. K., & Ryoo, M\u00a0. S. (2011). Human activity analysis: A review. ACM Computing Surveys, 43, 16:1\u201316:43.","journal-title":"ACM Computing Surveys"},{"key":"847_CR2","doi-asserted-by":"crossref","unstructured":"Choi, J., Jeon, W., & Lee, S. (2008). Spatio-temporal pyramid matching for sports videos. In ACM MIR.","DOI":"10.1145\/1460096.1460144"},{"key":"847_CR3","doi-asserted-by":"crossref","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., & Belongie, S. (2005). Behavior recognition via sparse spatio-temporal features. In IEEE Workshop on VS-PETS.","DOI":"10.1109\/VSPETS.2005.1570899"},{"key":"847_CR4","doi-asserted-by":"crossref","unstructured":"Fathi, A., Farhadi, A., & Rehg, J.\u00a0M. (2011). Understanding egocentric activities. In ICCV.","DOI":"10.1109\/ICCV.2011.6126269"},{"key":"847_CR5","doi-asserted-by":"crossref","unstructured":"Fathi, A., Hodgins, J., & Rehg, J. (2012). Social interactions: A first-person perspective. In CVPR.","DOI":"10.1109\/CVPR.2012.6247805"},{"key":"847_CR6","doi-asserted-by":"crossref","unstructured":"Hoai, M., & la Torre, F.\u00a0D. (2012). Max-margin early event detectors. In CVPR.","DOI":"10.1109\/CVPR.2012.6248012"},{"key":"847_CR7","doi-asserted-by":"crossref","unstructured":"Iwashita, Y., Takamine, A., Kurazume, R., & Ryoo, M.\u00a0S. (2014). First-person animal activity recognition from egocentric videos. In ICPR.","DOI":"10.1109\/ICPR.2014.739"},{"key":"847_CR8","doi-asserted-by":"crossref","unstructured":"Kitani, K.\u00a0M., Okabe, T., Sato, Y., & Sugimoto, A. (2011). Fast unsupervised ego-action learning for first-person sports videos. In CVPR.","DOI":"10.1109\/CVPR.2011.5995406"},{"key":"847_CR9","doi-asserted-by":"crossref","unstructured":"Kitani, K.\u00a0M., Ziebart, B.\u00a0D., Bagnell, J.\u00a0A., & Hebert, M. (2012). Activity forecasting. In ECCV.","DOI":"10.1007\/978-3-642-33765-9_15"},{"key":"847_CR10","doi-asserted-by":"crossref","unstructured":"Koppula, H.\u00a0S., & Saxena, A. (2013). Anticipating human activities using object affordances for reactive robotic response. In RSS.","DOI":"10.15607\/RSS.2013.IX.006"},{"key":"847_CR11","unstructured":"Lan, T., Sigal, L., & Mori, G. (2012). Social roles in hierarchical models for human activity recognition. In CVPR."},{"issue":"2","key":"847_CR12","doi-asserted-by":"crossref","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I. (2005). On space-time interest points. International Journal of Computer Vision, 64(2), 107\u2013123.","journal-title":"International Journal of Computer Vision"},{"key":"847_CR13","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., & Rozenfeld, B. (2008). Learning realistic human actions from movies. In CVPR.","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"847_CR14","doi-asserted-by":"crossref","unstructured":"Lazebnik, S., Schmid, C., & Ponce, J. (2006). Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories. In CVPR.","DOI":"10.1109\/CVPR.2006.68"},{"key":"847_CR15","doi-asserted-by":"crossref","unstructured":"Lee, S., Bambach, S., Crandall, D.J., Franchak, J.M., & Yu, C. (2014). This hand is my hand: A probabilistic approach to hand disambiguation in egocentric video. In CVPRW.","DOI":"10.1109\/CVPRW.2014.86"},{"key":"847_CR16","unstructured":"Lee, Y., Ghosh, J., & Grauman, K. (2012). Discovering important people and objects for egocentric video summarization. In CVPR."},{"key":"847_CR17","doi-asserted-by":"crossref","unstructured":"Li, Y., Fathi, A., & Rehg, J.\u00a0M. (2013). Learning to predict gaze in egocentric video. In ICCV.","DOI":"10.1109\/ICCV.2013.399"},{"key":"847_CR18","doi-asserted-by":"crossref","unstructured":"Niebles, J., Chen, C., & Fei-Fei, L. (2010). Modeling temporal structure of decomposable motion segments for activity classification. In ECCV.","DOI":"10.1007\/978-3-642-15552-9_29"},{"key":"847_CR19","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., & Ramanan, D. (2012). Detecting activities of daily living in first-person camera views. In CVPR.","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"847_CR20","doi-asserted-by":"crossref","unstructured":"Ryoo, M.\u00a0S. (2011). Human activity prediction: Early recognition of ongoing activities from streaming videos. In ICCV.","DOI":"10.1109\/ICCV.2011.6126349"},{"key":"847_CR21","doi-asserted-by":"crossref","unstructured":"Ryoo, M.\u00a0S., & Aggarwal, J. (2009). Spatio-temporal relationship match: Video structure comparison for recognition of complex human activities. In ICCV.","DOI":"10.1109\/ICCV.2009.5459361"},{"issue":"2","key":"847_CR22","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1007\/s11263-010-0355-5","volume":"93","author":"MS Ryoo","year":"2011","unstructured":"Ryoo, M. S., & Aggarwal, J. K. (2011). Stochastic representation and recognition of high-level group activities. International Journal of Computer Vision, 93(2), 183\u2013200.","journal-title":"International Journal of Computer Vision"},{"key":"847_CR23","doi-asserted-by":"crossref","unstructured":"Ryoo, M.\u00a0S., & Matthies, L. (2013). First-person activity recognition: What are they doing to me? In CVPR.","DOI":"10.1109\/CVPR.2013.352"},{"key":"847_CR24","doi-asserted-by":"crossref","unstructured":"Sadanand, S., & Corso, J. (2012). Action bank: A high-level representation of activity in video. In CVPR.","DOI":"10.1109\/CVPR.2012.6247806"},{"key":"847_CR25","doi-asserted-by":"crossref","unstructured":"Schuldt, C., Laptev, I., & Caputo, B. (2004). Recognizing human actions: A local SVM approach. In ICPR.","DOI":"10.1109\/ICPR.2004.1334462"},{"key":"847_CR26","unstructured":"Shawe-Taylor, N., & Kandola, A. (2002). On kernel target alignment. In NIPS."},{"key":"847_CR27","doi-asserted-by":"crossref","unstructured":"Shotton, J., Fitzgibbon, A., Cook, M., Sharp, T., Finocchio, M., Moore, R., Kipman, A., & Blake, A. (2011). Real-time human pose recognition in parts from a single depth image. In CVPR.","DOI":"10.1109\/CVPR.2011.5995316"},{"key":"847_CR28","doi-asserted-by":"crossref","unstructured":"Si, Z., Pei, M., Yao, B., & Zhu, S. (2011). Unsupervised learning of event and-or grammar and semantics from video. In ICCV.","DOI":"10.1109\/ICCV.2011.6126223"},{"key":"847_CR29","doi-asserted-by":"crossref","unstructured":"Spriggs, E.\u00a0H., Torre, F.\u00a0D.\u00a0L., & Hebert, M. (2009). Temporal segmentation and activity classification from first-person sensing. In IEEE Workshop on Egocentric Vision, in conjunction with CVPR.","DOI":"10.1109\/CVPRW.2009.5204354"},{"key":"847_CR30","first-page":"975","volume":"5","author":"T Wu","year":"2004","unstructured":"Wu, T., Lin, C., & Weng, R. (2004). Probability estimates for multi-class classification by pairwise coupling. JMLR, 5, 975\u20131005.","journal-title":"JMLR"},{"key":"847_CR31","doi-asserted-by":"crossref","unstructured":"Xia, L., Chen, C.-C., & Aggarwal, J.\u00a0K. (2012). View invariant human action recognition using histograms of 3D joints. In CVPRW.","DOI":"10.1109\/CVPRW.2012.6239233"},{"key":"847_CR32","unstructured":"Xie, D., Todorovic, S., & Zhu, S.-C. (2013). Inferring \u201cdark matter\u201d and \u201cdark energy\u201d from videos. In ICCV."},{"key":"847_CR33","doi-asserted-by":"crossref","first-page":"213","DOI":"10.1007\/s11263-006-9794-4","volume":"73","author":"J Zhang","year":"2007","unstructured":"Zhang, J., Marszalek, M., Lazebnik, S., & Schmid, C. (2007). Local features and kernels for classification of texture and object categories: A comprehensive study. International Journal of Computer Vision, 73, 213\u2013238.","journal-title":"International Journal of Computer Vision"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-015-0847-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-015-0847-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-015-0847-4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,8,28]],"date-time":"2019-08-28T20:10:39Z","timestamp":1567023039000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-015-0847-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,8,2]]},"references-count":33,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2016,9]]}},"alternative-id":["847"],"URL":"https:\/\/doi.org\/10.1007\/s11263-015-0847-4","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2015,8,2]]}}}