{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:44:14Z","timestamp":1777567454978,"version":"3.51.4"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319105925","type":"print"},{"value":"9783319105932","type":"electronic"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-10593-2_28","type":"book-chapter","created":{"date-parts":[[2014,8,14]],"date-time":"2014-08-14T02:52:23Z","timestamp":1407984743000},"page":"417-433","source":"Crossref","is-referenced-by-count":40,"title":["Discovering Groups of People in Images"],"prefix":"10.1007","author":[{"given":"Wongun","family":"Choi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu-Wei","family":"Chao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Caroline","family":"Pantofaru","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Silvio","family":"Savarese","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","reference":[{"key":"28_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/978-3-642-33765-9_14","volume-title":"Computer Vision \u2013 ECCV 2012","author":"M.R. Amer","year":"2012","unstructured":"Amer, M.R., Xie, D., Zhao, M., Todorovic, S., Zhu, S.-C.: Cost-sensitive top-down\/bottom-up inference for multiscale activity recognition. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part IV. LNCS, vol.\u00a07575, pp. 187\u2013200. Springer, Heidelberg (2012)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Bourdev, L., Malik, J.: Poselets: Body part detectors trained using 3d human pose annotations. In: International Conference on Computer Vision, ICCV (2009), \n                    \n                      http:\/\/www.eecs.berkeley.edu\/~lbourdev\/poselets","DOI":"10.1109\/ICCV.2009.5459303"},{"key":"28_CR3","unstructured":"Chang, C.C., Lin, C.J.: LIBSVM: A library for support vector machines (2001), \n                    \n                      http:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvm"},{"key":"28_CR4","unstructured":"Chen, C.Y., Grauman, K.: Efficient activity detection with max-subgraph search. In: CVPR (2012)"},{"key":"28_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/978-3-642-33765-9_16","volume-title":"Computer Vision \u2013 ECCV 2012","author":"W. Choi","year":"2012","unstructured":"Choi, W., Savarese, S.: A unified framework for multi-target tracking and collective activity recognition. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part IV. LNCS, vol.\u00a07575, pp. 215\u2013230. Springer, Heidelberg (2012)"},{"key":"28_CR6","unstructured":"Choi, W., Shahid, K., Savarese, S.: What are they doing?: Collective activity classification using spatio-temporal relationship among people. In: VSWS (2009)"},{"key":"28_CR7","unstructured":"Dalal, N., Triggs, B.: Histograms of oriented gradients for human detection. In: CVPR (2005)"},{"key":"28_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/978-3-642-33765-9_12","volume-title":"Computer Vision \u2013 ECCV 2012","author":"C. Desai","year":"2012","unstructured":"Desai, C., Ramanan, D.: Detecting actions, poses, and objects with relational phraselets. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part IV. LNCS, vol.\u00a07575, pp. 158\u2013172. Springer, Heidelberg (2012)"},{"key":"28_CR9","unstructured":"Dollar, P., Rabaud, V., Cottrell, G., Belongie, S.: Behavior recognition via sparse spatio-temporal features. In: VS-PETS (2005)"},{"key":"28_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"228","DOI":"10.1007\/978-3-642-15549-9_17","volume-title":"Computer Vision \u2013 ECCV 2010","author":"M. Eichner","year":"2010","unstructured":"Eichner, M., Ferrari, V.: We are family: Joint pose estimation of multiple persons. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part I. LNCS, vol.\u00a06311, pp. 228\u2013242. Springer, Heidelberg (2010)"},{"key":"28_CR11","unstructured":"Everingham, M., Van Gool, L., Williams, C.K.I., Winn, J., Zisserman, A.: The PASCAL Visual Object Classes Challenge (VOC 2012) Results, \n                    \n                      http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2012\/workshop\/index.html"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Hoai, M., De la Torre, F.: Max-margin early event detectors. In: CVPR (2012)","DOI":"10.1109\/CVPR.2012.6248012"},{"key":"28_CR13","doi-asserted-by":"crossref","unstructured":"Hoai, M., Lan, Z.Z., De la Torre, F.: Joint segmentation and classification of human actions in video. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995470"},{"key":"28_CR14","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Efros, A.A., Hebert, M.: Putting objects in perspective. IJCV (2008)","DOI":"10.1007\/s11263-008-0137-5"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Joachims, T., Finley, T., Yu, C.N.: Cutting-plane training of structural svms. Machine Learning (2009)","DOI":"10.1007\/s10994-009-5108-8"},{"key":"28_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1007\/978-3-642-33718-5_9","volume-title":"Computer Vision \u2013 ECCV 2012","author":"S. Khamis","year":"2012","unstructured":"Khamis, S., Morariu, V.I., Davis, L.S.: Combining per-frame and per-track cues for multi-person action recognition. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part I. LNCS, vol.\u00a07572, pp. 116\u2013129. Springer, Heidelberg (2012)"},{"key":"28_CR17","unstructured":"Koller, D., Friedman, N.: Probabilistic graphical models: principles and techniques. MIT press (2009)"},{"key":"28_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1007\/978-3-642-15555-0_18","volume-title":"Computer Vision \u2013 ECCV 2010","author":"L. Ladicky","year":"2010","unstructured":"Ladicky, L., Russell, C., Kohli, P., Torr, P.H.S.: Graph cut based inference with co-occurrence statistics. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part V. LNCS, vol.\u00a06315, pp. 239\u2013253. Springer, Heidelberg (2010)"},{"key":"28_CR19","unstructured":"Lan, T., Wang, Y., Yang, W., Mori, G.: Beyond actions: Discriminative models for contextual group activities. In: NIPS (2010)"},{"key":"28_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1007\/978-3-642-35749-7_14","volume-title":"Trends and Topics in Computer Vision","author":"T. Lan","year":"2012","unstructured":"Lan, T., Wang, Y., Mori, G., Robinovitch, S.N.: Retrieving actions in group contexts. In: Kutulakos, K.N. (ed.) ECCV 2010 Workshops, Part I. LNCS, vol.\u00a06553, pp. 181\u2013194. Springer, Heidelberg (2012)"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Laptev, I., Lindeberg, T.: Space-time interest points. In: ICCV (2003)","DOI":"10.1109\/ICCV.2003.1238378"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Leal-Taixe, L., Fenzi, M., Kuznetsova, A., Rosenhahn, B., Savarese, S.: Learning an image-based motion context for multiple people tracking. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR (2014)","DOI":"10.1109\/CVPR.2014.453"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Liu, J., Luo, J., Shah, M.: Recongizing realistic actions from videos \u201cin the wild\u201d. In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206744"},{"key":"28_CR24","doi-asserted-by":"crossref","unstructured":"Niebles, J.C., Wang, H., Fei-Fei, L.: Unsupervised learning of human action categories using spatial-temporal words. IJCV (2008)","DOI":"10.1007\/s11263-007-0122-4"},{"key":"28_CR25","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1007\/978-3-642-33885-4_25","volume-title":"Computer Vision \u2013 ECCV 2012. Workshops and Demonstrations","author":"S. Odashima","year":"2012","unstructured":"Odashima, S., Shimosaka, M., Kaneko, T., Fukui, R., Sato, T.: Collective activity localization with contextual spatial pyramid. In: Fusiello, A., Murino, V., Cucchiara, R. (eds.) ECCV 2012 Ws\/Demos, Part III. LNCS, vol.\u00a07585, pp. 243\u2013252. Springer, Heidelberg (2012)"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Patron-Perez, A., Marsza\u0142ek, M., Zisserman, A., Reid, I.D.: High five: Recognising human interactions in TV shows. In: BMVC (2010)","DOI":"10.5244\/C.24.50"},{"key":"28_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"452","DOI":"10.1007\/978-3-642-15549-9_33","volume-title":"Computer Vision \u2013 ECCV 2010","author":"S. Pellegrini","year":"2010","unstructured":"Pellegrini, S., Ess, A., Van Gool, L.: Improving data association by joint modeling of pedestrian trajectories and groupings. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part I. LNCS, vol.\u00a06311, pp. 452\u2013465. Springer, Heidelberg (2010)"},{"key":"28_CR28","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Aggarwal, J.K.: Spatio-temporal relationship match: Video structure comparison for recognition of complex human activities. In: ICCV (2009)","DOI":"10.1109\/ICCV.2009.5459361"},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Ryoo, M.S., Aggarwal, J.K.: Stochastic representation and recognition of high-level group activities. IJCV (2010)","DOI":"10.1007\/s11263-010-0355-5"},{"key":"28_CR30","unstructured":"Shi, J., Malik, J.: Normalized cuts and image segmentation. PAMI (2000)"},{"issue":"8","key":"28_CR31","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1109\/34.868688","volume":"22","author":"J. Shi","year":"2000","unstructured":"Shi, J., Malik, J.: Normalized cuts and image segmentation. PAMI\u00a022(8), 888\u2013905 (2000)","journal-title":"PAMI"},{"key":"28_CR32","unstructured":"Shotton, J., Winn, J., Rother, C., Criminisi, A.: Textonboost for image understanding: Multi-class object recognition and segmentation by jointly modeling texture, layout, and context. IJCV (2009)"},{"key":"28_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/978-3-642-33709-3_6","volume-title":"Computer Vision \u2013 ECCV 2012","author":"S. Singh","year":"2012","unstructured":"Singh, S., Gupta, A., Efros, A.A.: Unsupervised discovery of mid-level discriminative patches. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012, Part II. LNCS, vol.\u00a07573, pp. 73\u201386. Springer, Heidelberg (2012), \n                    \n                      http:\/\/arxiv.org\/abs\/1205.3137"},{"issue":"6","key":"28_CR34","doi-asserted-by":"publisher","first-page":"929","DOI":"10.1109\/TPAMI.2007.1046","volume":"29","author":"R. Unnikrishnan","year":"2007","unstructured":"Unnikrishnan, R., Pantofaru, C., Hebert, M.: Toward objective evaluation of image segmentation algorithms. PAMI\u00a029(6), 929\u2013944 (2007)","journal-title":"PAMI"},{"key":"28_CR35","unstructured":"Yang, Y., Baker, S., Kannan, A., Ramanan, D.: Recognizing proxemics in personal photos. In: CVPR (2012)"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Yao, A., Gall, J., Van Gool, L.: A hough transform-based voting framework for action recognition. In: CVPR (June 2010)","DOI":"10.1109\/CVPR.2010.5539883"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2014"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-10593-2_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,12,2]],"date-time":"2019-12-02T09:23:45Z","timestamp":1575278625000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-10593-2_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319105925","9783319105932"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-10593-2_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2014]]}}}