{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:34:52Z","timestamp":1772120092162,"version":"3.50.1"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"5-6","license":[{"start":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T00:00:00Z","timestamp":1731628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T00:00:00Z","timestamp":1731628800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51405448"],"award-info":[{"award-number":["51405448"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s13042-024-02454-3","type":"journal-article","created":{"date-parts":[[2024,11,15]],"date-time":"2024-11-15T10:40:44Z","timestamp":1731667244000},"page":"3345-3358","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["MCANet: a lightweight action recognition network with multidimensional convolution and attention"],"prefix":"10.1007","volume":"16","author":[{"given":"Qiuhong","family":"Tian","sequence":"first","affiliation":[]},{"given":"Weilun","family":"Miao","sequence":"additional","affiliation":[]},{"given":"Lizao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ziyu","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Yanying","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Lan","family":"Yao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,15]]},"reference":[{"issue":"5","key":"2454_CR1","doi-asserted-by":"publisher","first-page":"14885","DOI":"10.1007\/s11042-020-08806-9","volume":"83","author":"MA Khan","year":"2024","unstructured":"Khan MA, Javed K, Khan SA, Saba T, Habib U, Khan JA, Abbasi AA (2024) Human action recognition using fusion of multiview and deep features: an application to video surveillance. Multimed Tools Appl 83(5):14885\u201314911","journal-title":"Multimed Tools Appl"},{"issue":"2","key":"2454_CR2","doi-asserted-by":"publisher","first-page":"15","DOI":"10.3390\/technologies12020015","volume":"12","author":"N Manakitsa","year":"2024","unstructured":"Manakitsa N, Maraslidis GS, Moysis L, Fragulis GF (2024) A review of machine learning and deep learning for object detection, semantic segmentation, and human action recognition in machine and robotic vision. Technologies 12(2):15","journal-title":"Technologies"},{"key":"2454_CR3","doi-asserted-by":"crossref","unstructured":"Zheng Y, Huang H, Wang X, Yan X (2024) Spatio-temporal fusion for human action recognition via joint trajectory graph. In: Proceedings of the AAAI Conference on Artificial Intelligence 38:7579\u20137587","DOI":"10.1609\/aaai.v38i7.28590"},{"issue":"3","key":"2454_CR4","doi-asserted-by":"publisher","first-page":"2259","DOI":"10.1007\/s10462-020-09904-8","volume":"54","author":"P Pareek","year":"2021","unstructured":"Pareek P, Thakkar A (2021) A survey on video-based human action recognition: recent updates, datasets, challenges, and applications. Artif Intell Rev 54(3):2259\u20132322","journal-title":"Artif Intell Rev"},{"key":"2454_CR5","doi-asserted-by":"crossref","unstructured":"Nguyen DA, Le-Khac NA (2024) Sok: Behind the accuracy of complex human activity recognition using deep learning. arXiv preprint arXiv:2405.00712","DOI":"10.1109\/IJCNN60899.2024.10650322"},{"issue":"11","key":"2454_CR6","doi-asserted-by":"publisher","first-page":"2990","DOI":"10.1109\/TMM.2020.2965434","volume":"22","author":"J Li","year":"2020","unstructured":"Li J, Liu X, Zhang W, Zhang M, Song J, Sebe N (2020) Spatio-temporal attention networks for action recognition and detection. IEEE Trans Multimed 22(11):2990\u20133001","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"2454_CR7","doi-asserted-by":"publisher","first-page":"29","DOI":"10.3390\/electronics12010029","volume":"12","author":"SA Jebur","year":"2022","unstructured":"Jebur SA, Hussein KA, Hoomod HK, Alzubaidi L, Santamar\u00eda J (2022) Review on deep learning approaches for anomaly event detection in video surveillance. Electronics 12(1):29","journal-title":"Electronics"},{"key":"2454_CR8","doi-asserted-by":"crossref","unstructured":"Kozlov A, Andronov V, Gritsenko Y (2020) Lightweight network architecture for real-time action recognition. In: Proceedings of the 35th Annual ACM Symposium on Applied Computing, pp. 2074\u20132080","DOI":"10.1145\/3341105.3373906"},{"key":"2454_CR9","doi-asserted-by":"crossref","unstructured":"Arnab A, Dehghani M, Heigold G, Sun C, Lu\u010di\u0107 M, Schmid, C (2021) Vivit: A video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2454_CR10","doi-asserted-by":"crossref","unstructured":"Liu Z, Ning J, Cao Y, Wei Y, Zhang Z, Lin S, Hu H (2022) Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"2454_CR11","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C (2020) X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 203\u2013213","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"2454_CR12","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et al (2020) An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"2454_CR13","unstructured":"Bertasius G, Wang H, Torresani L (2021) Is space-time attention all you need for video understanding? In: ICML, vol. 2, p. 4"},{"key":"2454_CR14","doi-asserted-by":"crossref","unstructured":"Fan H, Xiong B, Mangalam K, Li Y, Yan Z, Malik J, Feichtenhofer C (2021) Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6824\u20136835","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"2454_CR15","doi-asserted-by":"crossref","unstructured":"Kong Z, Dong P, Ma X, Meng X, Niu W, Sun M, Shen X, Yuan G, Ren B, Tang H et\u00a0al. (2022) Spvit: Enabling faster vision transformers via latency-aware soft token pruning. In: European Conference on Computer Vision, pp. 620\u2013640 . Springer","DOI":"10.1007\/978-3-031-20083-0_37"},{"key":"2454_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.106235","volume":"174","author":"Y Wang","year":"2024","unstructured":"Wang Y, Du B, Wang W, Xu C (2024) Multi-tailed vision transformer for efficient inference. Neural Netw 174:106235","journal-title":"Neural Netw"},{"key":"2454_CR17","doi-asserted-by":"crossref","unstructured":"Qiu Z, Yao T, Mei T (2017) Learning spatio-temporal representation with pseudo-3d residual networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5533\u20135541","DOI":"10.1109\/ICCV.2017.590"},{"key":"2454_CR18","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861"},{"key":"2454_CR19","unstructured":"Tan M, Le Q (2019) Efficientnet: Rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, pp. 6105\u20136114 . PMLR"},{"key":"2454_CR20","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1155\/2022\/8323962","volume":"2022","author":"V-T Le","year":"2022","unstructured":"Le V-T, Tran-Trung K, Hoang VT (2022) A comprehensive review of recent deep learning techniques for human activity recognition. Comput Intell Neurosci 2022:58","journal-title":"Comput Intell Neurosci"},{"issue":"1","key":"2454_CR21","doi-asserted-by":"publisher","first-page":"323","DOI":"10.3390\/s22010323","volume":"22","author":"IU Khan","year":"2022","unstructured":"Khan IU, Afzal S, Lee JW (2022) Human activity recognition via hybrid deep learning based model. Sensors 22(1):323","journal-title":"Sensors"},{"key":"2454_CR22","unstructured":"Mehta S, Rastegari M (2021) Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178"},{"key":"2454_CR23","doi-asserted-by":"crossref","unstructured":"Cao Y, Xu J, Lin S, Wei F, Hu H (2019) Gcnet: Non-local networks meet squeeze-excitation networks and beyond. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops, pp. 0\u20130","DOI":"10.1109\/ICCVW.2019.00246"},{"key":"2454_CR24","doi-asserted-by":"crossref","unstructured":"Chen H, Chu X, Ren Y, Zhao X, Huang K (2024) Pelk: Parameter-efficient large kernel convnets with peripheral convolution. arXiv preprint arXiv:2403.07589","DOI":"10.1109\/CVPR52733.2024.00531"},{"key":"2454_CR25","unstructured":"Yu F, Koltun V (2015) Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122"},{"key":"2454_CR26","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"2454_CR27","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) Hmdb: a large video database for human motion recognition. In: 2011 International Conference on Computer Vision, pp. 2556\u20132563 . IEEE","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"2454_CR28","unstructured":"Soomro K, Zamir AR, Shah M (2012) Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402"},{"key":"2454_CR29","unstructured":"Kay W, Carreira J, Simonyan K, Zhang B, Hillier C, Vijayanarasimhan S, Viola F, Green T, Back T, Natsev P, et al. (2017) The kinetics human action video dataset. arXiv preprint arXiv:1705.06950"},{"key":"2454_CR30","unstructured":"Tran D, Ray J, Shou Z, Chang S-F, Paluri M (2017) Convnet architecture search for spatiotemporal feature learning. arXiv preprint arXiv:1708.05038"},{"key":"2454_CR31","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Fan H, Malik J, He K (2019) Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211","DOI":"10.1109\/ICCV.2019.00630"},{"key":"2454_CR32","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"2454_CR33","unstructured":"Xie S, Sun C, Huang J, Tu Z, Murphy K (2017) Rethinking spatiotemporal feature learning for video understanding. arXiv preprint arXiv:1712.04851 1(2), 5"},{"key":"2454_CR34","doi-asserted-by":"crossref","unstructured":"Chen J, Kao S-h, He H, Zhuo W, Wen S, Lee C-H, Chan S-HG (2023) Run, don\u2019t walk: Chasing higher flops for faster neural networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12021\u201312031","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"2454_CR35","doi-asserted-by":"crossref","unstructured":"Lin J, Gan C, Han S (2019) Tsm: Temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7083\u20137093","DOI":"10.1109\/ICCV.2019.00718"},{"key":"2454_CR36","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2018) Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"2454_CR37","unstructured":"Huang Z, Zhang S, Pan L, Qing Z, Tang M, Liu Z, Ang\u00a0Jr MH (2021) Tada! temporally-adaptive convolutions for video understanding. arXiv preprint arXiv:2110.06178"},{"key":"2454_CR38","doi-asserted-by":"crossref","unstructured":"Tran D, Wang H, Torresani L, Ray J, LeCun Y, Paluri M (2018) A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6450\u20136459","DOI":"10.1109\/CVPR.2018.00675"},{"key":"2454_CR39","doi-asserted-by":"crossref","unstructured":"Hou Q, Zhou D, Feng J (2021) Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13713\u201313722","DOI":"10.1109\/CVPR46437.2021.01350"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02454-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02454-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02454-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T00:32:51Z","timestamp":1749256371000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02454-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,15]]},"references-count":39,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["2454"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02454-3","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4596829\/v1","asserted-by":"object"}]},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,15]]},"assertion":[{"value":"18 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}