{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T16:09:37Z","timestamp":1774541377247,"version":"3.50.1"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585976","type":"print"},{"value":"9783030585983","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58598-3_13","type":"book-chapter","created":{"date-parts":[[2020,11,6]],"date-time":"2020-11-06T16:03:52Z","timestamp":1604678632000},"page":"208-224","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":93,"title":["Social Adaptive Module for\u00a0Weakly-Supervised Group Activity Recognition"],"prefix":"10.1007","author":[{"given":"Rui","family":"Yan","sequence":"first","affiliation":[]},{"given":"Lingxi","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Jinhui","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Xiangbo","family":"Shu","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Tian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,7]]},"reference":[{"key":"13_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1007\/978-3-642-33765-9_14","volume-title":"Computer Vision \u2013 ECCV 2012","author":"MR Amer","year":"2012","unstructured":"Amer, M.R., Xie, D., Zhao, M., Todorovic, S., Zhu, S.-C.: Cost-sensitive top-down\/bottom-up inference for multiscale activity recognition. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 187\u2013200. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33765-9_14"},{"key":"13_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"572","DOI":"10.1007\/978-3-319-10599-4_37","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MR Amer","year":"2014","unstructured":"Amer, M.R., Lei, P., Todorovic, S.: HiRF: hierarchical random field for collective activity recognition in videos. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8694, pp. 572\u2013585. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10599-4_37"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Azar, S.M., Atigh, M.G., Nickabadi, A., Alahi, A.: Convolutional relational machine for group activity recognition. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00808"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Bagautdinov, T., Alahi, A., Fleuret, F., Fua, P., Savarese, S.: Social scene understanding: end-to-end multi-person action localization and collective activity recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.365"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"Cadene, R., Ben-Younes, H., Cord, M., Thome, N.: MUREL: multimodal relational reasoning for visual question answering. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00209"},{"key":"13_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"13_CR7","unstructured":"Chen, K., et al.: MMDetection: Open MMLab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Chen, X., Gupta, A.: Spatial memory for context reasoning in object detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.440"},{"key":"13_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"215","DOI":"10.1007\/978-3-642-33765-9_16","volume-title":"Computer Vision \u2013 ECCV 2012","author":"W Choi","year":"2012","unstructured":"Choi, W., Savarese, S.: A unified framework for multi-target tracking and collective activity recognition. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 215\u2013230. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33765-9_16"},{"key":"13_CR10","unstructured":"Choi, W., Shahid, K., Savarese, S.: What are they doing?: collective activity classification using spatio-temporal relationship among people. In: ICCV Workshops (2009)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Choi, W., Shahid, K., Savarese, S.: Learning context for collective activity recognition. In: CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995707"},{"key":"13_CR12","doi-asserted-by":"crossref","unstructured":"Danelljan, M., H\u00e4ger, G., Khan, F., Felsberg, M.: Accurate scale estimation for robust visual tracking. In: BMVC (2014)","DOI":"10.5244\/C.28.65"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Deng, Z., Vahdat, A., Hu, H., Mori, G.: Structure inference machines: recurrent neural networks for analyzing relations in group activity recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.516"},{"key":"13_CR14","doi-asserted-by":"crossref","unstructured":"Gan, C., Wang, N., Yang, Y., Yeung, D.Y., Hauptmann, A.G.: DevNet: a deep event network for multimedia event detection and evidence recounting. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298872"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., Wei, Y.: Relation networks for object detection. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00378"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: A hierarchical deep temporal model for group activity recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.217"},{"key":"13_CR19","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G.: Hierarchical deep temporal models for group activity recognition. arXiv preprint arXiv:1607.02643 (2016)","DOI":"10.1109\/CVPR.2016.217"},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Jang, Y., Song, Y., Yu, Y., Kim, Y., Kim, G.: TGIF-QA: toward spatio-temporal reasoning in visual question answering. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.149"},{"key":"13_CR21","doi-asserted-by":"crossref","unstructured":"Johnson, J., Krishna, R., Stark, M., Li, L.J., Shamma, D., Bernstein, M., Fei-Fei, L.: Image retrieval using scene graphs. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"13_CR22","first-page":"1755","volume":"10","author":"DE King","year":"2009","unstructured":"King, D.E.: Dlib-ml: a machine learning toolkit. JMLR 10, 1755\u20131758 (2009)","journal-title":"JMLR"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"13_CR24","unstructured":"Lan, T., Sigal, L., Mori, G.: Social roles in hierarchical models for human activity recognition. In: CVPR (2012)"},{"key":"13_CR25","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1109\/TPAMI.2011.228","volume":"34","author":"T Lan","year":"2012","unstructured":"Lan, T., Wang, Y., Yang, W., Robinovitch, S.N., Mori, G.: Discriminative latent models for recognizing contextual group activities. TPAMI 34, 1549\u20131562 (2012)","journal-title":"TPAMI"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Zhou, B., Wang, K., Wang, X.: Scene graph generation from objects, phrases and caption regions. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.142"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: TSM: temporal shift module for efficient video understanding. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00718"},{"key":"13_CR28","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., Lee, J.Y., Jin, H.: Learning video representations from correspondence proposals. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00440"},{"key":"13_CR30","doi-asserted-by":"crossref","unstructured":"Long, X., Gan, C., De Melo, G., Wu, J., Liu, X., Wen, S.: Attention clusters: purely attention based local feature integration for video classification. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00817"},{"key":"13_CR31","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1007\/978-3-030-01249-6_7","volume-title":"Computer Vision \u2013 ECCV 2018","author":"M Qi","year":"2018","unstructured":"Qi, M., Qin, J., Li, A., Wang, Y., Luo, J., Van Gool, L.: stagNet: an attentive semantic RNN for group activity recognition. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11214, pp. 104\u2013120. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01249-6_7"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Ramanathan, V., Huang, J., Abu-El-Haija, S., Gorban, A., Murphy, K., Fei-Fei, L.: Detecting events and key actors in multi-person videos. In: CVPR, pp. 3043\u20133053 (2016)","DOI":"10.1109\/CVPR.2016.332"},{"key":"13_CR33","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: NeurIPS (2015)"},{"key":"13_CR34","unstructured":"Santoro, A., et al.: A simple neural network module for relational reasoning. In: NeurIPS (2017)"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Shu, T., Todorovic, S., Zhu, S.C.: CERN: confidence-energy recurrent network for group activity recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.453"},{"key":"13_CR36","unstructured":"Shu, T., Xie, D., Rothrock, B., Todorovic, S., Chun Zhu, S.: Joint inference of groups, events and human roles in aerial videos. In: CVPR (2015)"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Smith, R.: An overview of the Tesseract OCR engine. In: ICDAR (2007)","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"13_CR38","unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: a dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"13_CR39","unstructured":"Tang, J., Shu, X., Yan, R., Zhang, L.: Coherence constrained graph LSTM for group activity recognition. TPAMI (2019)"},{"key":"13_CR40","doi-asserted-by":"crossref","unstructured":"Tang, Y., Wang, Z., Li, P., Lu, J., Yang, M., Zhou, J.: Mining semantics-preserving attention for group activity recognition. In: ACM MM (2018)","DOI":"10.1145\/3240508.3240576"},{"key":"13_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1007\/978-3-319-46484-8_2","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Wang","year":"2016","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Yu., Lin, D., Tang, X., Van Gool, L.: Temporal segment networks: towards good practices for deep action recognition. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 20\u201336. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2"},{"key":"13_CR42","doi-asserted-by":"crossref","unstructured":"Wang, M., Ni, B., Yang, X.: Recurrent modeling of interaction context for collective activity recognition. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.783"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, L., Wang, L., Guo, J., Wu, G.: Learning actor relation graphs for group activity recognition. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01020"},{"key":"13_CR45","doi-asserted-by":"crossref","unstructured":"Yan, R., Tang, J., Shu, X., Li, Z., Tian, Q.: Participation-contributed temporal dynamic model for group activity recognition. In: ACM MM (2018)","DOI":"10.1145\/3240508.3240572"},{"key":"13_CR46","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"690","DOI":"10.1007\/978-3-030-01246-5_41","volume-title":"Computer Vision \u2013 ECCV 2018","author":"J Yang","year":"2018","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph R-CNN for scene graph generation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 690\u2013706. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_41"},{"key":"13_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1007\/978-3-030-01246-5_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Zhou","year":"2018","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 831\u2013846. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_49"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58598-3_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T00:26:47Z","timestamp":1730852807000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58598-3_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585976","9783030585983"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58598-3_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"7 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}