{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,16]],"date-time":"2026-05-16T01:00:08Z","timestamp":1778893208812,"version":"3.51.4"},"publisher-location":"Cham","reference-count":52,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031197710","type":"print"},{"value":"9783031197727","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19772-7_2","type":"book-chapter","created":{"date-parts":[[2022,10,27]],"date-time":"2022-10-27T22:09:58Z","timestamp":1666908598000},"page":"19-35","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["Hunting Group Clues with\u00a0Transformers for\u00a0Social Group Activity Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1029-5271","authenticated-orcid":false,"given":"Masato","family":"Tamura","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8874-6816","authenticated-orcid":false,"given":"Rahul","family":"Vishwakarma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ravigopal","family":"Vennelakanti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,28]]},"reference":[{"key":"2_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1007\/978-3-319-10599-4_37","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MR Amer","year":"2014","unstructured":"Amer, M.R., Lei, P., Todorovic, S.: HiRF: hierarchical random field for collective activity recognition in videos. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 572\u2013585. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_37"},{"issue":"4","key":"2_CR2","doi-asserted-by":"publisher","first-page":"800","DOI":"10.1109\/TPAMI.2015.2465955","volume":"38","author":"MR Amer","year":"2016","unstructured":"Amer, M.R., Todorovic, S.: Sum product networks for activity recognition. IEEE TPAMI 38(4), 800\u2013813 (2016)","journal-title":"IEEE TPAMI"},{"key":"2_CR3","doi-asserted-by":"crossref","unstructured":"Amer, M.R., Todorovic, S., Fern, A., Zhu, S.C.: Monte Carlo tree search for scheduling activity recognition. In: ICCV, December 2013","DOI":"10.1109\/ICCV.2013.171"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Azar, S.M., Atigh, M.G., Nickabadi, A., Alahi, A.: Convolutional relational machine for group activity recognition. In: CVPR, June 2019","DOI":"10.1109\/CVPR.2019.00808"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Bagautdinov, T.M., Alahi, A., Fleuret, F., Fua, P.V., Savarese, S.: Social scene understanding: end-to-end multi-person action localization and collective activity recognition. In: CVPR, July 2017","DOI":"10.1109\/CVPR.2017.365"},{"key":"2_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR, July 2017","DOI":"10.1109\/CVPR.2017.502"},{"key":"2_CR8","doi-asserted-by":"crossref","unstructured":"Cho, K., et al.: Learning phrase representations using RNN encoder-decoder for statistical machine translation. In: EMNLP, October 2014","DOI":"10.3115\/v1\/D14-1179"},{"key":"2_CR9","unstructured":"Choi, W., Shahid, K., Savarese, S.: What are they doing?: collective activity classification using spatio-temporal relationship among people. In: ICCVW, September 2009"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: ICCV, October 2017","DOI":"10.1109\/ICCV.2017.89"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Dai, X., Chen, Y., Yang, J., Zhang, P., Yuan, L., Zhang, L.: Dynamic DETR: end-to-end object detection with dynamic attention. In: ICCV, October 2021","DOI":"10.1109\/ICCV48922.2021.00298"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Deng, Z., Vahdat, A., Hu, H., Mori, G.: Structure inference machines: recurrent neural networks for analyzing relations in group activity recognition. In: CVPR, June 2016","DOI":"10.1109\/CVPR.2016.516"},{"key":"2_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/978-3-030-58545-7_11","volume-title":"Computer Vision \u2013 ECCV 2020","author":"M Ehsanpour","year":"2020","unstructured":"Ehsanpour, M., Abedin, A., Saleh, F., Shi, J., Reid, I., Rezatofighi, H.: Joint learning of social groups, individuals action and sub-group activities in videos. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 177\u2013195. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_11"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Gavrilyuk, K., Sanford, R., Javan, M., Snoek, C.G.M.: Actor-transformers for group activity recognition. In: CVPR, June 2020","DOI":"10.1109\/CVPR42600.2020.00092"},{"issue":"5","key":"2_CR15","doi-asserted-by":"publisher","first-page":"1003","DOI":"10.1109\/TPAMI.2011.176","volume":"34","author":"W Ge","year":"2012","unstructured":"Ge, W., Collins, R.T., Ruback, R.B.: Vision-based analysis of small groups in pedestrian crowds. IEEE TPAMI 34(5), 1003\u20131016 (2012)","journal-title":"IEEE TPAMI"},{"issue":"8","key":"2_CR16","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"2_CR17","doi-asserted-by":"crossref","unstructured":"Hu, G., Cui, B., He, Y., Yu, S.: Progressive relation learning for group activity recognition. In: CVPR, June 2020","DOI":"10.1109\/CVPR42600.2020.00106"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Mori, G.: Hierarchical relational networks for group activity recognition and retrieval. In: ECCV, September 2018","DOI":"10.1007\/978-3-030-01219-9_44"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: A hierarchical deep temporal model for group activity recognition. In: CVPR, June 2016","DOI":"10.1109\/CVPR.2016.217"},{"key":"2_CR20","unstructured":"Kay, W., et al.: The kinetics human action video dataset, May 2017. arXiv:1705.06950"},{"key":"2_CR21","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: ICLR, April 2017"},{"key":"2_CR22","doi-asserted-by":"crossref","unstructured":"Kong, L., Qin, J., Huang, D., Wang, Y., Gool, L.V.: Hierarchical attention and context modeling for group activity recognition. In: ICASSP, April 2018","DOI":"10.1109\/ICASSP.2018.8461770"},{"key":"2_CR23","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1002\/nav.3800020109","volume":"2","author":"HW Kuhn","year":"1955","unstructured":"Kuhn, H.W., Yaw, B.: The Hungarian method for the assignment problem. Naval Res. Logist. Q. 2, 83\u201397 (1955)","journal-title":"Naval Res. Logist. Q."},{"key":"2_CR24","unstructured":"Lan, T., Sigal, L., Mori, G.: Social roles in hierarchical models for human activity recognition. In: CVPR, June 2012"},{"key":"2_CR25","unstructured":"Lan, T., Wang, Y., Yang, W., Mori, G.: Beyond actions: discriminative models for contextual group activities. In: NIPS, December 2010"},{"issue":"8","key":"2_CR26","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1109\/TPAMI.2011.228","volume":"34","author":"T Lan","year":"2012","unstructured":"Lan, T., Wang, Y., Yang, W., Robinovitch, S.N., Mori, G.: Discriminative latent models for recognizing contextual group activities. IEEE TPAMI 34(8), 1549\u20131562 (2012)","journal-title":"IEEE TPAMI"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Li, S., et al.: GroupFormer: group activity recognition with clustered spatial-temporal transformer. In: ICCV, October 2021","DOI":"10.1109\/ICCV48922.2021.01341"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Li, X., Chuah, M.C.: SBGAR: semantics based group activity recognition. In: ICCV, October 2017","DOI":"10.1109\/ICCV.2017.313"},{"key":"2_CR29","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: ICCV, October 2017","DOI":"10.1109\/ICCV.2017.324"},{"key":"2_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"2_CR31","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR, May 2019"},{"key":"2_CR32","unstructured":"Ng, A., Jordan, M., Weiss, Y.: On spectral clustering: analysis and an algorithm. In: NIPS, December 2002"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Park, H., Shi, J.: Social saliency prediction. In: CVPR, June 2015","DOI":"10.1109\/CVPR.2015.7299110"},{"key":"2_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1007\/978-3-030-58452-8_5","volume-title":"Computer Vision \u2013 ECCV 2020","author":"RRA Pramono","year":"2020","unstructured":"Pramono, R.R.A., Chen, Y.T., Fang, W.H.: Empowering relational network by self-attention augmented conditional random fields for group activity recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 71\u201390. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_5"},{"key":"2_CR35","doi-asserted-by":"crossref","unstructured":"Qi, M., Qin, J., Li, A., Wang, Y., Luo, J., Gool, L.V.: stagNet: an attentive semantic RNN for group activity recognition. In: ECCV, September 2018","DOI":"10.1007\/978-3-030-01249-6_7"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Sendo, K., Ukita, N.: Heatmapping of people involved in group activities. In: MVA, May 2019","DOI":"10.23919\/MVA.2019.8757971"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Shu, T., Todorovic, S., Zhu, S.C.: CERN: confidence-energy recurrent network for group activity recognition. In: CVPR, July 2017","DOI":"10.1109\/CVPR.2017.453"},{"key":"2_CR38","unstructured":"Shu, T., Xie, D., Rothrock, B., Todorovic, S., Zhu, S.: Joint inference of groups, events and human roles in aerial videos. In: CVPR, June 2015"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Sun, Z., Cao, S., Yang, Y., Kitani, K.M.: Rethinking transformer-based set prediction for object detection. In: ICCV, October 2021","DOI":"10.1109\/ICCV48922.2021.00359"},{"issue":"2","key":"2_CR40","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/TPAMI.2019.2928540","volume":"44","author":"J Tang","year":"2022","unstructured":"Tang, J., Shu, X., Yan, R., Zhang, L.: Coherence constrained graph LSTM for group activity recognition. IEEE TPAMI 44(2), 636\u2013647 (2022)","journal-title":"IEEE TPAMI"},{"key":"2_CR41","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NIPS, December 2017"},{"key":"2_CR42","unstructured":"Veli\u010dkovi\u010d, P., Cucurull, G., Casanova, A., Romero, A., Li\u00f3, P., Bengio, Y.: Graph attention networks. In: ICLR, April 2018"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Wang, M., Ni, B., Yang, X.: Recurrent modeling of interaction context for collective activity recognition. In: CVPR, July 2017","DOI":"10.1109\/CVPR.2017.783"},{"key":"2_CR44","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, L., Wang, L., Guo, J., Wu, G.: Learning actor relation graphs for group activity recognition. In: CVPR, June 2019","DOI":"10.1109\/CVPR.2019.01020"},{"key":"2_CR45","doi-asserted-by":"crossref","unstructured":"Yan, R., Shu, X., Yuan, C., Tian, Q., Tang, J.: Position-aware participation-contributed temporal dynamic model for group activity recognition. IEEE TNNLS (2021)","DOI":"10.1109\/TNNLS.2021.3085567"},{"key":"2_CR46","doi-asserted-by":"crossref","unstructured":"Yan, R., Tang, J., Shu, X., Li, Z., Tian, Q.: Participation-contributed temporal dynamic model for group activity recognition. In: ACMMM, October 2018","DOI":"10.1145\/3240508.3240572"},{"key":"2_CR47","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: HiGCIN: hierarchical graph-based cross inference network for group activity recognition. IEEE TPAMI (2020)"},{"key":"2_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"208","DOI":"10.1007\/978-3-030-58598-3_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"R Yan","year":"2020","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: Social adaptive module for\u00a0weakly-supervised group activity recognition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12353, pp. 208\u2013224. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_13"},{"key":"2_CR49","doi-asserted-by":"crossref","unstructured":"Yuan, H., Ni, D., Wang, M.: Spatio-temporal dynamic inference network for group activity recognition. In: ICCV, October 2021","DOI":"10.1109\/ICCV48922.2021.00738"},{"key":"2_CR50","doi-asserted-by":"crossref","unstructured":"Zhou, H., et al.: COMPOSER: compositional learning of group activity in videos, December 2021. arXiv:2112.05892","DOI":"10.1007\/978-3-031-19833-5_15"},{"key":"2_CR51","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P.: Objects as points, April 2019. arXiv:1904.07850"},{"key":"2_CR52","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: ICLR, May 2021"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19772-7_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T16:47:02Z","timestamp":1710262022000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19772-7_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031197710","9783031197727"],"references-count":52,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19772-7_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"28 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}