{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T22:34:03Z","timestamp":1742942043077,"version":"3.40.3"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031463075"},{"type":"electronic","value":"9783031463082"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-46308-2_32","type":"book-chapter","created":{"date-parts":[[2023,10,29]],"date-time":"2023-10-29T18:01:24Z","timestamp":1698602484000},"page":"389-400","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["SSTA-Net: Self-supervised Spatio-Temporal Attention Network for\u00a0Action Recognition"],"prefix":"10.1007","author":[{"given":"Yihan","family":"Li","sequence":"first","affiliation":[]},{"given":"Wenwen","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhao","family":"Pei","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,30]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"Beye, F., Shinohara, Y., Itsumi, H., Nihei, K.: Recognition-aware bitrate allocation for ai-enabled remote video surveillance. In: Proceedings of the IEEE 20th Consumer Communications & Networking Conference, pp. 158\u2013163 (2023)","key":"32_CR1","DOI":"10.1109\/CCNC51644.2023.10059877"},{"key":"32_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109453","volume":"139","author":"S Gupta","year":"2023","unstructured":"Gupta, S., Maple, C., Crispo, B., Raja, K., Yautsiukhin, A., Martinelli, F.: A survey of human-computer interaction (hci) & natural habits-based behavioural biometric modalities for user recognition schemes. Pattern Recogn. 139, 109453 (2023)","journal-title":"Pattern Recogn."},{"key":"32_CR3","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.imavis.2017.01.010","volume":"60","author":"S Herath","year":"2017","unstructured":"Herath, S., Harandi, M., Porikli, F.: Going deeper into action recognition: a survey. Image Vis. Comput. 60, 4\u201321 (2017)","journal-title":"Image Vis. Comput."},{"doi-asserted-by":"crossref","unstructured":"Suh, S., Rey, V.F., Lukowicz, P.: Adversarial deep feature extraction network for user independent human activity recognition. In: Proceedings of the IEEE International Conference on Pervasive Computing and Communications, pp. 217\u2013226 (2022)","key":"32_CR4","DOI":"10.1109\/PerCom53586.2022.9762387"},{"key":"32_CR5","doi-asserted-by":"publisher","first-page":"112257","DOI":"10.1109\/ACCESS.2021.3103211","volume":"9","author":"YM Hwang","year":"2021","unstructured":"Hwang, Y.M., Park, S., Lee, H.O., Ko, S.K., Lee, B.T.: Deep learning for human activity recognition based on causality feature extraction. IEEE Access 9, 112257\u2013112275 (2021)","journal-title":"IEEE Access"},{"unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: Proceedings of the 38th International Conference on Machine Learning, pp. 813\u2013824 (2021)","key":"32_CR6"},{"doi-asserted-by":"crossref","unstructured":"Hou, Y., Ma, Z., Liu, C., Loy, C.C.: Learning lightweight lane detection cnns by self attention distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1013\u20131021 (2019)","key":"32_CR7","DOI":"10.1109\/ICCV.2019.00110"},{"doi-asserted-by":"crossref","unstructured":"Caron, M., Touvron, H., Misra, I., J\u00e9gou, H., Mairal, J., Bojanowski, P., Joulin, A.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","key":"32_CR8","DOI":"10.1109\/ICCV48922.2021.00951"},{"doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","key":"32_CR9","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"32_CR10","first-page":"5679","volume":"33","author":"T Han","year":"2020","unstructured":"Han, T., Xie, W., Zisserman, A.: Self-supervised co-training for video representation learning. Adv. Neural. Inf. Process. Syst. 33, 5679\u20135690 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Qian, R., Meng, T., Gong, B., Yang, M.H., Wang, H., Belongie, S., Cui, Y.: Spatiotemporal contrastive video representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6964\u20136974 (2021)","key":"32_CR11","DOI":"10.1109\/CVPR46437.2021.00689"},{"doi-asserted-by":"crossref","unstructured":"Wang, J., Jiao, J., Liu, Y.H.: Self-supervised video representation learning by pace prediction. In: Proceedings of the European Conference on Computer Vision, pp. 504\u2013521 (2020)","key":"32_CR12","DOI":"10.1007\/978-3-030-58520-4_30"},{"doi-asserted-by":"crossref","unstructured":"Luo, D., Liu, C., Zhou, Y., Yang, D., Ma, C., Ye, Q., Wang, W.: Video cloze procedure for self-supervised spatio-temporal learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 11701\u201311708 (2020)","key":"32_CR13","DOI":"10.1609\/aaai.v34i07.6840"},{"doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: Vivit: a video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","key":"32_CR14","DOI":"10.1109\/ICCV48922.2021.00676"},{"doi-asserted-by":"crossref","unstructured":"Wang, Z., Fang, Z., Wang, J., Yang, Y.: Vitaa: visual-textual attributes alignment in person search by natural language. In: Proceedings of the European Conference on Computer Vision, pp. 402\u2013420 (2020)","key":"32_CR15","DOI":"10.1007\/978-3-030-58610-2_24"},{"doi-asserted-by":"crossref","unstructured":"Fan, H., Xiong, B., Mangalam, K., Li, Y., Yan, Z., Malik, J., Feichtenhofer, C.: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6824\u20136835 (2021)","key":"32_CR16","DOI":"10.1109\/ICCV48922.2021.00675"},{"unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)","key":"32_CR17"},{"key":"32_CR18","first-page":"21271","volume":"33","author":"JB Grill","year":"2020","unstructured":"Grill, J.B., et al.: Bootstrap your own latent-a new approach to self-supervised learning. Adv. Neural. Inf. Process. Syst. 33, 21271\u201321284 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","key":"32_CR19","DOI":"10.1109\/CVPR.2017.502"},{"unstructured":"Soomro, K., Zamir, A.R., Shah, M.: Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)","key":"32_CR20"},{"doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459 (2018)","key":"32_CR21","DOI":"10.1109\/CVPR.2018.00675"},{"doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","key":"32_CR22","DOI":"10.1109\/ICCV.2019.00630"},{"key":"32_CR23","doi-asserted-by":"publisher","first-page":"368","DOI":"10.3390\/e24030368","volume":"24","author":"Q Yang","year":"2022","unstructured":"Yang, Q., Lu, T., Zhou, H.: A spatio-temporal motion network for action recognition based on spatial attention. Entropy 24, 368 (2022)","journal-title":"Entropy"},{"doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 203\u2013213 (2020)","key":"32_CR24","DOI":"10.1109\/CVPR42600.2020.00028"},{"unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: Proceedings of the 9th International Conference on Learning Representations, pp. 132\u2013141 (2021)","key":"32_CR25"},{"key":"32_CR26","doi-asserted-by":"publisher","first-page":"5266","DOI":"10.1109\/TCSVT.2022.3141051","volume":"32","author":"L Tao","year":"2022","unstructured":"Tao, L., Wang, X., Yamasaki, T.: An improved inter-intra contrastive learning framework on self-supervised video representation. IEEE Trans. Circuits Syst. Video Technol. 32, 5266\u20135280 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"32_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2022.103406","volume":"219","author":"I Dave","year":"2022","unstructured":"Dave, I., Gupta, R., Rizve, M.N., Shah, M.: Tclr: temporal contrastive learning for video representation. Comput. Vis. Image Underst. 219, 103406 (2022)","journal-title":"Comput. Vis. Image Underst."},{"doi-asserted-by":"crossref","unstructured":"Khorasgani, S.H., Chen, Y., Shkurti, F.: Slic: Self-supervised learning with iterative clustering for human action videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition,. pp. 16091\u201316101 (2022)","key":"32_CR28","DOI":"10.1109\/CVPR52688.2022.01562"},{"doi-asserted-by":"crossref","unstructured":"Ranasinghe, K., Naseer, M., Khan, S., Khan, F.S., Ryoo, M.S.: Self-supervised video transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2874\u20132884 (2022)","key":"32_CR29","DOI":"10.1109\/CVPR52688.2022.00289"}],"container-title":["Lecture Notes in Computer Science","Image and Graphics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-46308-2_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,29]],"date-time":"2023-10-29T18:05:32Z","timestamp":1698602732000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-46308-2_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031463075","9783031463082"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-46308-2_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"30 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIG","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image and Graphics","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nanjing","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icig2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/icig2023.csig.org.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Conference Management Toolkit","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"409","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"166","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}