{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T02:20:11Z","timestamp":1779934811460,"version":"3.53.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T00:00:00Z","timestamp":1715126400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T00:00:00Z","timestamp":1715126400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s11263-024-02082-y","type":"journal-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T14:02:26Z","timestamp":1715176946000},"page":"4269-4288","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Design and Analysis of Efficient Attention in Transformers for Social Group Activity Recognition"],"prefix":"10.1007","volume":"132","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1029-5271","authenticated-orcid":false,"given":"Masato","family":"Tamura","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,5,8]]},"reference":[{"key":"2082_CR1","doi-asserted-by":"crossref","unstructured":"Amer, M. R., Lei, P., Todorovic, S. (2014) HiRF: Hierarchical random field for collective activity recognition in videos. In: ECCV","DOI":"10.1007\/978-3-319-10599-4_37"},{"issue":"4","key":"2082_CR2","doi-asserted-by":"publisher","first-page":"800","DOI":"10.1109\/TPAMI.2015.2465955","volume":"38","author":"MR Amer","year":"2016","unstructured":"Amer, M. R., & Todorovic, S. (2016). Sum product networks for activity recognition. IEEE TPAMI, 38(4), 800\u2013813.","journal-title":"IEEE TPAMI"},{"key":"2082_CR3","doi-asserted-by":"crossref","unstructured":"Amer, M. R., Todorovic, S., Fern, A., Zhu, S. C. (2013) Monte carlo tree search for scheduling activity recognition. In: ICCV","DOI":"10.1109\/ICCV.2013.171"},{"key":"2082_CR4","doi-asserted-by":"crossref","unstructured":"Azar, S. M., Atigh, M. G., Nickabadi, A., Alahi, A. (2019) Convolutional relational machine for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2019.00808"},{"key":"2082_CR5","doi-asserted-by":"crossref","unstructured":"Bagautdinov, T. M., Alahi, A., Fleuret, F., Fua, P. V., Savarese, S. (2017) Social scene understanding: End-to-end multi-person action localization and collective activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2017.365"},{"key":"2082_CR6","unstructured":"Bertasius, G., Wang, H., Torresani, L. (2021) Is space-time attention all you need for video understanding? In: ICML"},{"key":"2082_CR7","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S. (2020) End-to-end object detection with transformers. In: ECCV","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2082_CR8","doi-asserted-by":"crossref","unstructured":"Carreira, J., & Zisserman, A. (2017). Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR.","DOI":"10.1109\/CVPR.2017.502"},{"key":"2082_CR9","doi-asserted-by":"crossref","unstructured":"Choi, W., Chao, Y. W., Pantofaru, C., Savarese, S. (2014) Discovering groups of people in images. In: ECCV","DOI":"10.1007\/978-3-319-10593-2_28"},{"key":"2082_CR10","unstructured":"Choi, W., Shahid, K., Savarese, S. (2009) What are they doing? : Collective activity classification using spatio-temporal relationship among people. In: ICCVW"},{"key":"2082_CR11","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y. (2017) Deformable convolutional networks. In: ICCV","DOI":"10.1109\/ICCV.2017.89"},{"key":"2082_CR12","doi-asserted-by":"crossref","unstructured":"Deng, Z., Vahdat, A., Hu, H., Mori, G. (2016) Structure inference machines: Recurrent neural networks for analyzing relations in group activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2016.516"},{"key":"2082_CR13","doi-asserted-by":"crossref","unstructured":"Ehsanpour, M., Abedin, A., Saleh, F., Shi, J., Reid, I., Rezatofighi, H. (2020) Joint learning of social groups, individuals action and sub-group activities in videos. In: ECCV","DOI":"10.1007\/978-3-030-58545-7_11"},{"key":"2082_CR14","doi-asserted-by":"crossref","unstructured":"Gavrilyuk, K., Sanford, R., Javan, M., Snoek, C. G. M. (2020) Actor-transformers for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR42600.2020.00092"},{"issue":"5","key":"2082_CR15","doi-asserted-by":"publisher","first-page":"1003","DOI":"10.1109\/TPAMI.2011.176","volume":"34","author":"W Ge","year":"2012","unstructured":"Ge, W., Collins, R. T., & Ruback, R. B. (2012). Vision-based analysis of small groups in pedestrian crowds. IEEE TPAMI, 34(5), 1003\u20131016.","journal-title":"IEEE TPAMI"},{"key":"2082_CR16","doi-asserted-by":"crossref","unstructured":"Hu, G., Cui, B., He, Y., Yu, S. (2020) Progressive relation learning for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR42600.2020.00106"},{"key":"2082_CR17","doi-asserted-by":"crossref","unstructured":"Ibrahim, M. S., Mori, G. (2018) Hierarchical relational networks for group activity recognition and retrieval. In: ECCV","DOI":"10.1007\/978-3-030-01219-9_44"},{"key":"2082_CR18","doi-asserted-by":"crossref","unstructured":"Ibrahim, M.S., Muralidharan, S., Deng, Z., Vahdat, A., Mori, G. (2016) A hierarchical deep temporal model for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2016.217"},{"key":"2082_CR19","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., Viola, F., Green, T., Back, T., Natsev, P., Suleyman, M., Zisserman, A. (2017) The kinetics human action video dataset. ArXiv:1705.06950"},{"key":"2082_CR20","unstructured":"Kipf, T. N., Welling, M. (2017) Semi-supervised classification with graph convolutional networks. In: ICLR"},{"key":"2082_CR21","doi-asserted-by":"crossref","unstructured":"Kong, L., Qin, J., Huang, D., Wang, Y., Gool, L. V. (2018) Hierarchical attention and context modeling for group activity recognition. In: ICASSP","DOI":"10.1109\/ICASSP.2018.8461770"},{"key":"2082_CR22","doi-asserted-by":"crossref","unstructured":"Kuhn, H. W., Yaw, B. (1955) The hungarian method for the assignment problem. Naval Res. Logist. Quart pp. 83\u201397","DOI":"10.1002\/nav.3800020109"},{"key":"2082_CR23","unstructured":"Lan, T., Sigal, L., Mori, G. (2012) Social roles in hierarchical models for human activity recognition. In: CVPR"},{"issue":"8","key":"2082_CR24","doi-asserted-by":"publisher","first-page":"1549","DOI":"10.1109\/TPAMI.2011.228","volume":"34","author":"T Lan","year":"2012","unstructured":"Lan, T., Wang, Y., Yang, W., Robinovitch, S. N., & Mori, G. (2012). Discriminative latent models for recognizing contextual group activities. IEEE TPAMI, 34(8), 1549\u20131562.","journal-title":"IEEE TPAMI"},{"key":"2082_CR25","doi-asserted-by":"crossref","unstructured":"Li, S., Cao, Q., Liu, L., Yang, K., Liu, S., Hou, J., Yi, S. (2021) GroupFormer: Group activity recognition with clustered spatial-temporal transformer. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01341"},{"key":"2082_CR26","doi-asserted-by":"crossref","unstructured":"Li, X., Chuah, M. C. (2017) SBGAR: Semantics based group activity recognition. In: ICCV","DOI":"10.1109\/ICCV.2017.313"},{"key":"2082_CR27","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P. (2017) Focal loss for dense object detection. In: ICCV","DOI":"10.1109\/ICCV.2017.324"},{"key":"2082_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L. (2014) Microsoft COCO: Common objects in context. In: ECCV","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2082_CR29","unstructured":"Loshchilov, I., Hutter, F. (2019) Decoupled weight decay regularization. In: ICLR"},{"key":"2082_CR30","doi-asserted-by":"crossref","unstructured":"Park H, Shi J. (2015) Social saliency prediction. In: CVPR","DOI":"10.1109\/CVPR.2015.7299110"},{"key":"2082_CR31","doi-asserted-by":"crossref","unstructured":"Pramono, R. R. A., Chen, Y. T., Fang, W. H. (2020) Empowering relational network by self-attention augmented conditional random fields for group activity recognition. In: ECCV","DOI":"10.1007\/978-3-030-58452-8_5"},{"key":"2082_CR32","doi-asserted-by":"crossref","unstructured":"Qi, M., Qin, J., Li, A., Wang, Y., Luo, J., Gool, L. V. (2018) StagNet: An attentive semantic rnn for group activity recognition. In: ECCV","DOI":"10.1007\/978-3-030-01249-6_7"},{"key":"2082_CR33","doi-asserted-by":"crossref","unstructured":"Sendo, K., Ukita, N. (2019) Heatmapping of people involved in group activities. In: MVA","DOI":"10.23919\/MVA.2019.8757971"},{"key":"2082_CR34","doi-asserted-by":"crossref","unstructured":"Shu, T., Todorovic, S., Zhu, S. C. (2017) CERN: Confidence-energy recurrent network for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2017.453"},{"key":"2082_CR35","doi-asserted-by":"crossref","unstructured":"Tamura, M., Vishwakarma, R., Vennelakanti, R. (2022) Hunting group clues with transformers for social group activity recognition. In: ECCV","DOI":"10.1007\/978-3-031-19772-7_2"},{"issue":"2","key":"2082_CR36","doi-asserted-by":"publisher","first-page":"636","DOI":"10.1109\/TPAMI.2019.2928540","volume":"44","author":"J Tang","year":"2022","unstructured":"Tang, J., Shu, X., Yan, R., & Zhang, L. (2022). Coherence constrained graph lstm for group activity recognition. IEEE TPAMI, 44(2), 636\u2013647.","journal-title":"IEEE TPAMI"},{"key":"2082_CR37","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., Polosukhin, I. (2017) Attention is all you need. In: NIPS"},{"key":"2082_CR38","unstructured":"Veli\u010dkovi\u010d, P., Cucurull, G., Casanova, A., Romero, A., Li\u00f2, P., Bengio, Y. (2018) Graph attention networks. In: ICLR"},{"key":"2082_CR39","doi-asserted-by":"crossref","unstructured":"Wang, M., Ni, B., Yang, X. (2017) Recurrent modeling of interaction context for collective activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2017.783"},{"key":"2082_CR40","doi-asserted-by":"crossref","unstructured":"Wang, Z., Shi, Q., Shen, C., van\u00a0den Hengel, A. (2013) Bilinear programming for human activity recognition with unknown mrf graphs. In: CVPR","DOI":"10.1109\/CVPR.2013.221"},{"key":"2082_CR41","doi-asserted-by":"crossref","unstructured":"Wu, J., Wang, L., Wang, L., Guo, J., Wu, G. (2019) Learning actor relation graphs for group activity recognition. In: CVPR","DOI":"10.1109\/CVPR.2019.01020"},{"key":"2082_CR42","doi-asserted-by":"crossref","unstructured":"Yan, R., Shu, X., Yuan, C., Tian, Q., & Tang, J. (2022). Position-aware participation-contributed temporal dynamic model for group activity recognition. IEEE TNNLS, 33(12), 7574\u20137588.","DOI":"10.1109\/TNNLS.2021.3085567"},{"key":"2082_CR43","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q. (2020) HiGCIN: Hierarchical graph-based cross inference network for group activity recognition. IEEE TPAMI"},{"key":"2082_CR44","doi-asserted-by":"crossref","unstructured":"Yan, R., Xie, L., Tang, J., Shu, X., Tian, Q. (2020) Social adaptive module for weakly-supervised group activity recognition. In: ECCV","DOI":"10.1007\/978-3-030-58598-3_13"},{"key":"2082_CR45","doi-asserted-by":"crossref","unstructured":"Yuan, H., Ni, D., Wang, M. (2021) Spatio-temporal dynamic inference network for group activity recognition. In: ICCV","DOI":"10.1109\/ICCV48922.2021.00738"},{"key":"2082_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, H., Kadav, A., Shamsian, A., Geng, S., Lai, F., Zhao, L., Liu, T., Kapadia, M., Graf, H. P. (2021) COMPOSER: Compositional learning of group activity in videos. arXiv preprint arXiv:2112.05892","DOI":"10.1007\/978-3-031-19833-5_15"},{"key":"2082_CR47","unstructured":"Zhou, X., Wang, D., Kr\u00e4henb\u00fchl, P. (2019) Objects as points. ArXiv:1904.07850"},{"key":"2082_CR48","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J. (2021) Deformable DETR: Deformable transformers for end-to-end object detection. In: ICLR"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02082-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-024-02082-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-024-02082-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,4]],"date-time":"2024-10-04T06:15:43Z","timestamp":1728022543000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-024-02082-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,8]]},"references-count":48,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["2082"],"URL":"https:\/\/doi.org\/10.1007\/s11263-024-02082-y","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,8]]},"assertion":[{"value":"14 February 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 April 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}