{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,8]],"date-time":"2025-07-08T05:16:44Z","timestamp":1751951804847,"version":"3.40.3"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031783531"},{"type":"electronic","value":"9783031783548"}],"license":[{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,4]],"date-time":"2024-12-04T00:00:00Z","timestamp":1733270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78354-8_19","type":"book-chapter","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T10:28:55Z","timestamp":1733221735000},"page":"293-309","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Self-supervised Multi-actor Social Activity Understanding in Streaming Videos"],"prefix":"10.1007","author":[{"given":"Shubham","family":"Trehan","sequence":"first","affiliation":[]},{"given":"Sathyanarayanan N.","family":"Aakur","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,4]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Aakur, S., Sarkar, S.: Action localization through continual predictive learning. In: ECCV. pp. 300\u2013317 (2020)","DOI":"10.1007\/978-3-030-58568-6_18"},{"key":"19_CR2","doi-asserted-by":"crossref","unstructured":"Aakur, S., Sarkar, S.: Actor-centered representations for action localization in streaming videos. In: ECCV. pp. 70\u201387 (2022)","DOI":"10.1007\/978-3-031-19839-7_5"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Aakur, S.N., Sarkar, S.: A perceptual prediction framework for self supervised event segmentation. In: CVPR. pp. 1197\u20131206 (2019)","DOI":"10.1109\/CVPR.2019.00129"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Azar, S.M., Atigh, M.G., Nickabadi, A., Alahi, A.: Convolutional relational machine for group activity recognition. In: CVPR. pp. 7892\u20137901 (2019)","DOI":"10.1109\/CVPR.2019.00808"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Bal, A.B., Mounir, R., Aakur, S., Sarkar, S., Srivastava, A.: Bayesian tracking of video graphs using joint kalman smoothing and registration. In: ECCV. pp. 440\u2013456 (2022)","DOI":"10.1007\/978-3-031-19833-5_26"},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Bian, C., Feng, W., Wang, S.: Self-supervised representation learning for skeleton-based group activity recognition. In: ACM MM (2022)","DOI":"10.1145\/3503161.3547822"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: ECCV. pp. 213\u2013229 (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: CVPR. pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Choi, W., Shahid, K., Savarese, S.: What are they doing?: Collective activity classification using spatio-temporal relationship among people. In: ICCV Workshops. pp. 1282\u20131289 (2009)","DOI":"10.1109\/ICCVW.2009.5457461"},{"key":"19_CR10","doi-asserted-by":"crossref","unstructured":"Ehsanpour, M., Abedin, A., Saleh, F., Shi, J., Reid, I., Rezatofighi, H.: Joint learning of social groups, individuals action and sub-group activities in videos. In: ECCV. pp. 177\u2013195 (2020)","DOI":"10.1007\/978-3-030-58545-7_11"},{"key":"19_CR11","doi-asserted-by":"crossref","unstructured":"Gavrilyuk, K., Sanford, R., Javan, M., Snoek, C.G.: Actor-transformers for group activity recognition. In: CVPR. pp. 839\u2013848 (2020)","DOI":"10.1109\/CVPR42600.2020.00092"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Han, M., Zhang, D.J., Wang, Y., Yan, R., Yao, L., Chang, X., Qiao, Y.: Dual-ai: Dual-path actor interaction learning for group activity recognition. In: CVPR. pp. 2990\u20132999 (2022)","DOI":"10.1109\/CVPR52688.2022.00300"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR. pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Hu, G., Cui, B., He, Y., Yu, S.: Progressive relation learning for group activity recognition. In: CVPR (June 2020)","DOI":"10.1109\/CVPR42600.2020.00106"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: A hierarchical deep temporal model for group activity recognition. In: CVPR (June 2016)","DOI":"10.1109\/CVPR.2016.217"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Jhuang, H., Gall, J., Zuffi, S., Schmid, C., Black, M.J.: Towards understanding action recognition. In: ICCV. pp. 3192\u20133199 (Dec 2013)","DOI":"10.1109\/ICCV.2013.396"},{"key":"19_CR17","unstructured":"Jiang, Y.G., Liu, J., Zamir, A.R., Toderici, G., Laptev, I., Shah, M., Sukthankar, R.: Thumos challenge: Action recognition with a large number of classes (2014)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Kim, D., Lee, J., Cho, M., Kwak, S.: Detector-free weakly supervised group activity recognition. In: CVPR. pp. 20083\u201320093 (2022)","DOI":"10.1109\/CVPR52688.2022.01945"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Kim, J., Lee, M., Heo, J.P.: Self-feedback detr for temporal action detection. In: ICCV. pp. 10286\u201310296 (2023)","DOI":"10.1109\/ICCV51070.2023.00944"},{"issue":"9","key":"19_CR20","first-page":"6086","volume":"32","author":"L Kong","year":"2022","unstructured":"Kong, L., Pei, D., He, R., Huang, D., Wang, Y.: Spatio-temporal player relation modeling for tactic recognition in sports videos. IEEE T-CSVT 32(9), 6086\u20136099 (2022)","journal-title":"IEEE T-CSVT"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Kong, L., Qin, J., Huang, D., Wang, Y., Van\u00a0Gool, L.: Hierarchical attention and context modeling for group activity recognition. In: ICASSP. pp. 1328\u20131332 (2018)","DOI":"10.1109\/ICASSP.2018.8461770"},{"key":"19_CR22","doi-asserted-by":"crossref","unstructured":"Li, S., Cao, Q., Liu, L., Yang, K., Liu, S., Hou, J., Yi, S.: Groupformer: Group activity recognition with clustered spatial-temporal transformer. In: ICCV. pp. 13668\u201313677 (2021)","DOI":"10.1109\/ICCV48922.2021.01341"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Li, X., Choo\u00a0Chuah, M.: Sbgar: Semantics based group activity recognition. In: ICCV. pp. 2876\u20132885 (2017)","DOI":"10.1109\/ICCV.2017.313"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: ECCV. pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"19_CR25","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: Ssd: Single shot multibox detector. In: ECCV. pp. 21\u201337 (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"19_CR26","unstructured":"Lotter, W., Kreiman, G., Cox, D.: Deep predictive coding networks for video prediction and unsupervised learning. arXiv preprint arXiv:1605.08104 (2016)"},{"issue":"2","key":"19_CR27","first-page":"524","volume":"22","author":"L Lu","year":"2020","unstructured":"Lu, L., Lu, Y., Yu, R., Di, H., Zhang, L., Wang, S.: Gaim: Graph attention interaction model for collective activity recognition. IEEE T-MM 22(2), 524\u2013539 (2020)","journal-title":"IEEE T-MM"},{"key":"19_CR28","unstructured":"Mounir, R., Vijayaraghavan, S., Sarkar, S.: Streamer: Streaming representation learning and event segmentation in a hierarchical manner. NeurIPS 36 (2024)"},{"key":"19_CR29","doi-asserted-by":"crossref","unstructured":"Pramono, R.R.A., Chen, Y.T., Fang, W.H.: Empowering relational network by self-attention augmented conditional random fields for group activity recognition. In: ECCV. pp. 71\u201390 (2020)","DOI":"10.1007\/978-3-030-58452-8_5"},{"issue":"2","key":"19_CR30","first-page":"549","volume":"30","author":"M Qi","year":"2019","unstructured":"Qi, M., Wang, Y., Qin, J., Li, A., Luo, J., Van Gool, L.: Stagnet: An attentive semantic rnn for group activity and individual action recognition. IEEE T-CSVT 30(2), 549\u2013565 (2019)","journal-title":"IEEE T-CSVT"},{"issue":"2","key":"19_CR31","first-page":"549","volume":"30","author":"M Qi","year":"2020","unstructured":"Qi, M., Wang, Y., Qin, J., Li, A., Luo, J., Van Gool, L.: stagnet: An attentive semantic rnn for group activity and individual action recognition. IEEE T-CSVT 30(2), 549\u2013565 (2020)","journal-title":"IEEE T-CSVT"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Raviteja\u00a0Chappa, N.V., Nguyen, P., Nelson, A.H., Seo, H.S., Li, X., Dobbs, P.D., Luu, K.: Sogar: Self-supervised spatiotemporal attention-based social group activity recognition. arXiv e-prints pp. arXiv\u20132305 (2023)","DOI":"10.2139\/ssrn.4504147"},{"key":"19_CR33","doi-asserted-by":"crossref","unstructured":"Shu, T., Todorovic, S., Zhu, S.C.: Cern: confidence-energy recurrent network for group activity recognition. In: CVPR. pp. 5523\u20135531 (2017)","DOI":"10.1109\/CVPR.2017.453"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Shu, X., Zhang, L., Sun, Y., Tang, J.: Host\u2013parasite: Graph lstm-in-lstm for group activity recognition. IEEE TNNLS (2021)","DOI":"10.1109\/TNNLS.2020.2978942"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Soomro, K., Shah, M.: Unsupervised action discovery and localization in videos. In: ICCV. pp. 696\u2013705 (2017)","DOI":"10.1109\/ICCV.2017.82"},{"key":"19_CR36","doi-asserted-by":"crossref","unstructured":"Soomro, K., Zamir, A.R.: Action recognition in realistic sports videos. In: Computer Vision in Sports, pp. 181\u2013208. Springer (2015)","DOI":"10.1007\/978-3-319-09396-3_9"},{"key":"19_CR37","doi-asserted-by":"crossref","unstructured":"Tamura, M., Vishwakarma, R., Vennelakanti, R.: Hunting group clues with transformers for social group activity recognition. In: ECCV. pp. 19\u201335 (2022)","DOI":"10.1007\/978-3-031-19772-7_2"},{"issue":"2","key":"19_CR38","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/TPAMI.2019.2928540","volume":"44","author":"J Tang","year":"2019","unstructured":"Tang, J., Shu, X., Yan, R., Zhang, L.: Coherence constrained graph lstm for group activity recognition. IEEE T-PAMI 44(2), 636\u2013647 (2019)","journal-title":"IEEE T-PAMI"},{"issue":"10","key":"19_CR39","doi-asserted-by":"publisher","first-page":"4997","DOI":"10.1109\/TIP.2019.2914577","volume":"28","author":"Y Tang","year":"2019","unstructured":"Tang, Y., Lu, J., Wang, Z., Yang, M., Zhou, J.: Learning semantics-preserving attention and contextual interaction for group activity recognition. IEEE T-IP 28(10), 4997\u20135012 (2019)","journal-title":"IEEE T-IP"},{"key":"19_CR40","unstructured":"Tarashima, S.: One-shot deep model for end-to-end multi-person activity recognition. In: BMVC (2021)"},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Thilakarathne, H., Nibali, A., He, Z., Morgan, S.: Group activity recognition using unreliable tracked pose. arXiv preprint arXiv:2401.03262 (2024)","DOI":"10.1007\/s00521-024-10470-1"},{"key":"19_CR42","doi-asserted-by":"crossref","unstructured":"Trehan, S., Aakur, S.N.: Towards active vision for action localization with reactive control and predictive learning. In: WACV. pp. 783\u2013792 (2022)","DOI":"10.1109\/WACV51458.2022.00345"},{"key":"19_CR43","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. NeurIPS 30 (2017)"},{"key":"19_CR44","doi-asserted-by":"crossref","unstructured":"Wang, M., Ni, B., Yang, X.: Recurrent modeling of interaction context for collective activity recognition. In: CVPR. pp. 3048\u20133056 (2017)","DOI":"10.1109\/CVPR.2017.783"},{"key":"19_CR45","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, L., Wang, L., Guo, J., Wu, G.: Learning actor relation graphs for group activity recognition. In: CVPR. pp. 9964\u20139974 (2019)","DOI":"10.1109\/CVPR.2019.01020"},{"key":"19_CR46","doi-asserted-by":"crossref","unstructured":"Wu, L., Lang, X., Xiang, Y., Chen, C., Li, Z., Wang, Z.: Active spatial positions based hierarchical relation inference for group activity recognition. IEEE T-CSVT (2022)","DOI":"10.1109\/TCSVT.2022.3228731"},{"key":"19_CR47","doi-asserted-by":"crossref","unstructured":"Xie, Z., Jiao, C., Wu, K., Guo, D., Hong, R.: Active factor graph network for group activity recognition. IEEE T-IP (2024)","DOI":"10.1109\/TIP.2024.3362140"},{"issue":"6","key":"19_CR48","doi-asserted-by":"publisher","first-page":"6955","DOI":"10.1109\/TPAMI.2020.3034233","volume":"45","author":"R Yan","year":"2020","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: Higcin: Hierarchical graph-based cross inference network for group activity recognition. IEEE T-PAMI 45(6), 6955\u20136968 (2020)","journal-title":"IEEE T-PAMI"},{"key":"19_CR49","doi-asserted-by":"crossref","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q.: Social adaptive module for weakly-supervised group activity recognition. In: ECCV. pp. 208\u2013224 (2020)","DOI":"10.1007\/978-3-030-58598-3_13"},{"key":"19_CR50","doi-asserted-by":"crossref","unstructured":"Yuan, H., Ni, D.: Learning visual context for group activity recognition. In: AAAI Conference on Artificial Intelligence. vol.\u00a035, pp. 3261\u20133269 (2021)","DOI":"10.1609\/aaai.v35i4.16437"},{"issue":"1","key":"19_CR51","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1037\/0096-3445.130.1.29","volume":"130","author":"JM Zacks","year":"2001","unstructured":"Zacks, J.M., Tversky, B., Iyer, G.: Perceiving, remembering, and communicating structure in events. J. Exp. Psychol. Gen. 130(1), 29 (2001)","journal-title":"J. Exp. Psychol. Gen."},{"key":"19_CR52","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/TIP.2019.2918725","volume":"29","author":"P Zhang","year":"2019","unstructured":"Zhang, P., Tang, Y., Hu, J.F., Zheng, W.S.: Fast collective activity recognition under weak supervision. IEEE T-IP 29, 29\u201343 (2019)","journal-title":"IEEE T-IP"},{"key":"19_CR53","doi-asserted-by":"crossref","unstructured":"Zhou, H., Kadav, A., Shamsian, A., Geng, S., Lai, F., Zhao, L., Liu, T., Kapadia, M., Graf, H.P.: Composer: compositional reasoning of group activity in videos with keypoint-only modality. In: ECCV. pp. 249\u2013266 (2022)","DOI":"10.1007\/978-3-031-19833-5_15"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78354-8_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T11:31:01Z","timestamp":1733225461000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78354-8_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,4]]},"ISBN":["9783031783531","9783031783548"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78354-8_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,4]]},"assertion":[{"value":"4 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}