{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T10:57:15Z","timestamp":1775732235177,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T00:00:00Z","timestamp":1724025600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T00:00:00Z","timestamp":1724025600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61873274"],"award-info":[{"award-number":["61873274"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Complex Intell. Syst."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s40747-024-01571-4","type":"journal-article","created":{"date-parts":[[2024,8,19]],"date-time":"2024-08-19T07:04:31Z","timestamp":1724051071000},"page":"8249-8272","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Hybrid attentive prototypical network for few-shot action recognition"],"prefix":"10.1007","volume":"10","author":[{"given":"Zanxi","family":"Ruan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4568-551X","authenticated-orcid":false,"given":"Yingmei","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Yanming","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Yuxiang","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,19]]},"reference":[{"issue":"10","key":"1571_CR1","doi-asserted-by":"publisher","first-page":"983","DOI":"10.1007\/s00371-012-0752-6","volume":"29","author":"S Vishwakarma","year":"2013","unstructured":"Vishwakarma S, Agrawal A (2013) A survey on activity recognition and behavior understanding in video surveillance. Vis Comput 29(10):983\u20131009","journal-title":"Vis Comput"},{"key":"1571_CR2","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.patrec.2018.05.018","volume":"118","author":"G Yao","year":"2019","unstructured":"Yao G, Lei T, Zhong J (2019) A review of convolutional-neural-network-based action recognition. Pattern Recogn Lett 118:14\u201322","journal-title":"Pattern Recogn Lett"},{"key":"1571_CR3","doi-asserted-by":"publisher","unstructured":"Bilal H, Yao W, Guo Y, Wu Y, Guo J (2017) Experimental validation of fuzzy PID control of flexible joint system in presence of uncertainties. In: 2017 36th Chinese control conference (CCC), pp 4192\u20134197. https:\/\/doi.org\/10.23919\/ChiCC.2017.8028015","DOI":"10.23919\/ChiCC.2017.8028015"},{"issue":"3","key":"1571_CR4","doi-asserted-by":"publisher","first-page":"2248","DOI":"10.1109\/LRA.2024.3355752","volume":"9","author":"Z Liu","year":"2024","unstructured":"Liu Z, Lu X, Liu W, Qi W, Su H (2024) Human-robot collaboration through a multi-scale graph convolution neural network with temporal attention. IEEE Robot Autom Lett 9(3):2248\u20132255. https:\/\/doi.org\/10.1109\/LRA.2024.3355752","journal-title":"IEEE Robot Autom Lett"},{"issue":"8","key":"1571_CR5","doi-asserted-by":"publisher","first-page":"4987","DOI":"10.1007\/s00500-023-08026-x","volume":"27","author":"H Bilal","year":"2023","unstructured":"Bilal H, Yin B, Aslam MS, Anjum Z, Rohra A, Wang Y (2023) A practical study of active disturbance rejection control for rotary flexible joint robot manipulator. Soft Comput 27(8):4987\u20135001","journal-title":"Soft Comput"},{"issue":"7","key":"1571_CR6","doi-asserted-by":"publisher","first-page":"4029","DOI":"10.1007\/s00500-023-07923-5","volume":"27","author":"H Bilal","year":"2023","unstructured":"Bilal H, Yin B, Kumar A, Ali M, Zhang J, Yao J (2023) Jerk-bounded trajectory planning for rotary flexible joint manipulator: an experimental approach. Soft Comput 27(7):4029\u20134039","journal-title":"Soft Comput"},{"issue":"10","key":"1571_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3561971","volume":"55","author":"FUM Ullah","year":"2023","unstructured":"Ullah FUM, Obaidat MS, Ullah A, Muhammad K, Hijji M, Baik SW (2023) A comprehensive review on vision-based violence detection in surveillance videos. ACM Comput Surv 55(10):1\u201344","journal-title":"ACM Comput Surv"},{"issue":"23","key":"1571_CR8","doi-asserted-by":"publisher","first-page":"18195","DOI":"10.1007\/s00500-023-09278-3","volume":"27","author":"Q Wu","year":"2023","unstructured":"Wu Q, Li X, Wang K, Bilal H (2023) Regional feature fusion for on-road detection of objects using camera and 3D-lidar in high-speed autonomous vehicles. Soft Comput 27(23):18195\u201318213","journal-title":"Soft Comput"},{"issue":"21","key":"1571_CR9","doi-asserted-by":"publisher","first-page":"16373","DOI":"10.1007\/s00500-023-09164-y","volume":"27","author":"H Dou","year":"2023","unstructured":"Dou H, Liu Y, Chen S, Zhao H, Bilal H (2023) A hybrid CEEMD-GMM scheme for enhancing the detection of traffic flow on highways. Soft Comput 27(21):16373\u201316388","journal-title":"Soft Comput"},{"key":"1571_CR10","doi-asserted-by":"crossref","unstructured":"Caba\u00a0Heilbron F, Escorcia V, Ghanem B, Carlos\u00a0Niebles J (2015) Activitynet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 961\u2013970","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"1571_CR11","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"1571_CR12","unstructured":"Bertasius G, Wang H, Torresani L (2021) Is space-time attention all you need for video understanding? In: ICML, vol 2, p 4"},{"key":"1571_CR13","first-page":"1","volume":"27","author":"K Simonyan","year":"2014","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. Adv Neural Inform Process Syst 27:1","journal-title":"Adv Neural Inform Process Syst"},{"key":"1571_CR14","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"1571_CR15","doi-asserted-by":"crossref","unstructured":"Fu Y, Zhang L, Wang J, Fu Y, Jiang YG (2020) Depth guided adaptive meta-fusion network for few-shot video recognition. In: Proceedings of the 28th ACM international conference on multimedia, pp 1142\u20131151","DOI":"10.1145\/3394171.3413502"},{"key":"1571_CR16","doi-asserted-by":"crossref","unstructured":"Kumar\u00a0Dwivedi S, Gupta V, Mitra R, Ahmed S, Jain A (2019) Protogan: towards few shot learning for action recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision workshops","DOI":"10.1109\/ICCVW.2019.00166"},{"key":"1571_CR17","doi-asserted-by":"crossref","unstructured":"Wang X, Zhang S, Qing Z, Tang M, Zuo Z, Gao C, Jin R, Sang N (2022) Hybrid relation guided set matching for few-shot action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 19948\u201319957","DOI":"10.1109\/CVPR52688.2022.01932"},{"key":"1571_CR18","unstructured":"Zhu X, Toisoul A, Perez-Rua J-M, Zhang L, Martinez B, Xiang T (2021) Few-shot action recognition with prototype-centered attentive learning. Preprint arXiv:2101.08085"},{"key":"1571_CR19","first-page":"1","volume":"30","author":"J Snell","year":"2017","unstructured":"Snell J, Swersky K, Zemel R (2017) Prototypical networks for few-shot learning. Adv Neural Inform Process Syst 30:1","journal-title":"Adv Neural Inform Process Syst"},{"key":"1571_CR20","first-page":"1","volume":"29","author":"O Vinyals","year":"2016","unstructured":"Vinyals O, Blundell C, Lillicrap T, Wierstra D et al (2016) Matching networks for one shot learning. Adv Neural Inform Process Syst 29:1","journal-title":"Adv Neural Inform Process Syst"},{"key":"1571_CR21","doi-asserted-by":"crossref","unstructured":"Thatipelli A, Narayan S, Khan S, Anwer RM, Khan FS, Ghanem B (2022) Spatio-temporal relation modeling for few-shot action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 19958\u201319967","DOI":"10.1109\/CVPR52688.2022.01933"},{"key":"1571_CR22","doi-asserted-by":"crossref","unstructured":"Perrett T, Masullo A, Burghardt T, Mirmehdi M, Damen D (2021) Temporal\u2013relational cross transformers for few-shot action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 475\u2013484","DOI":"10.1109\/CVPR46437.2021.00054"},{"key":"1571_CR23","doi-asserted-by":"crossref","unstructured":"Cao K, Ji J, Cao Z, Chang C-Y, Niebles JC (2020) Few-shot video classification via temporal alignment. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10618\u201310627","DOI":"10.1109\/CVPR42600.2020.01063"},{"key":"1571_CR24","unstructured":"Kumar N, Narang S (2021) Few shot activity recognition using variational inference. Preprint arXiv:2108.08990"},{"key":"1571_CR25","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: a large video database for human motion recognition. In: International conference on computer vision. IEEE, pp 2556\u20132563","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"1571_CR26","unstructured":"Soomro K, Zamir AR, Shah M (2012) Ucf101: a dataset of 101 human actions classes from videos in the wild. Preprint arXiv:1212.0402"},{"key":"1571_CR27","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C (2020) X3d: expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 203\u2013213","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"1571_CR28","doi-asserted-by":"crossref","unstructured":"Tran D, Wang H, Torresani L, Ray J, LeCun Y, Paluri M (2018) A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6450\u20136459","DOI":"10.1109\/CVPR.2018.00675"},{"key":"1571_CR29","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3431234","volume":"2022","author":"Y Song","year":"2022","unstructured":"Song Y, Wang T, Cai P, Mondal SK, Sahoo JP (2022) A comprehensive survey of few-shot learning: evolution, applications, challenges, and opportunities. ACM Comput Surv 2022:1","journal-title":"ACM Comput Surv"},{"issue":"1","key":"1571_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13007-022-00866-2","volume":"18","author":"J Yang","year":"2022","unstructured":"Yang J, Guo X, Li Y, Marinello F, Ercisli S, Zhang Z (2022) A survey of few-shot learning in smart agriculture: developments, applications, and challenges. Plant Methods 18(1):1\u201312","journal-title":"Plant Methods"},{"key":"1571_CR31","doi-asserted-by":"crossref","unstructured":"Sung F, Yang Y, Zhang L, Xiang T, Torr PH, Hospedales TM (2018) Learning to compare: relation network for few-shot learning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, vol 1, pp 199\u20131208","DOI":"10.1109\/CVPR.2018.00131"},{"issue":"6","key":"1571_CR32","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1016\/j.imavis.2009.11.014","volume":"28","author":"R Poppe","year":"2010","unstructured":"Poppe R (2010) A survey on vision-based human action recognition. Image Vis Comput 28(6):976\u2013990","journal-title":"Image Vis Comput"},{"key":"1571_CR33","doi-asserted-by":"crossref","unstructured":"Zhu L, Yang Y (2018) Compound memory networks for few-shot video classification. In: Proceedings of the European conference on computer vision (ECCV), pp 751\u2013766","DOI":"10.1007\/978-3-030-01234-2_46"},{"key":"1571_CR34","doi-asserted-by":"crossref","unstructured":"Zhang H, Zhang L, Qi X, Li H, Torr PH, Koniusz P (2020) Few-shot action recognition with permutation-invariant attention. In: European conference on computer vision, vol 1. Springer, London, pp 525\u2013542","DOI":"10.1007\/978-3-030-58558-7_31"},{"key":"1571_CR35","first-page":"24581","volume":"34","author":"S Laenen","year":"2021","unstructured":"Laenen S, Bertinetto L (2021) On episodes, prototypical networks, and few-shot learning. Adv Neural Inform Process Syst 34:24581\u201324592","journal-title":"Adv Neural Inform Process Syst"},{"key":"1571_CR36","unstructured":"Bishay M, Zoumpourlis G, Patras I (2019) Tarn: temporal attentive relation network for few-shot and zero-shot action recognition. Preprint arXiv:1907.09021"},{"key":"1571_CR37","doi-asserted-by":"crossref","unstructured":"Liu H, Liu F, Fan X, Huang D (2021) Polarized self-attention: towards high-quality pixel-wise regression. Preprint arXiv:2107.00782","DOI":"10.1016\/j.neucom.2022.07.054"},{"key":"1571_CR38","doi-asserted-by":"crossref","unstructured":"Sun Q, Liu Y, Chua T-S, Schiele B (2019) Meta-transfer learning for few-shot learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 403\u2013412","DOI":"10.1109\/CVPR.2019.00049"},{"key":"1571_CR39","first-page":"1","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Adv Neural Inform Process Syst 30:1","journal-title":"Adv Neural Inform Process Syst"},{"issue":"1","key":"1571_CR40","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P-T De Boer","year":"2005","unstructured":"De Boer P-T, Kroese DP, Mannor S, Rubinstein RY (2005) A tutorial on the cross-entropy method. Ann Oper Res 134(1):19\u201367","journal-title":"Ann Oper Res"},{"key":"1571_CR41","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1571_CR42","unstructured":"Brown TB, Mann B, Ryder N, Subbiah M, Kaplan J, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A, Agarwal S, Herbert-Voss A, Krueger G, Henighan T, Child R, Ramesh A, Ziegler DM, Wu J, Winter C, Hesse C, Chen M, Sigler E, Litwin M, Gray S, Chess B, Clark J, Berner C, McCandlish S, Radford A, Sutskever I, Amodei D (2020) Language models are few-shot learners. arXiv:2005.14165"},{"key":"1571_CR43","doi-asserted-by":"crossref","unstructured":"Khachatryan L, Movsisyan A, Tadevosyan V, Henschel R, Wang Z, Navasardyan S, Shi S (2023) Text2video-zero: text-to-image diffusion models are zero-shot video generators. Preprint arXiv:2303.13439","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"1571_CR44","doi-asserted-by":"crossref","unstructured":"Wang X, Li Y, Zhang H, Shan Y (2021) Towards real-world blind face restoration with generative facial prior. In: The IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR46437.2021.00905"},{"key":"1571_CR45","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J et\u00a0al (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning, PMLR, pp 8748\u20138763"}],"container-title":["Complex &amp; Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01571-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s40747-024-01571-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s40747-024-01571-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,16]],"date-time":"2024-10-16T22:19:10Z","timestamp":1729117150000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s40747-024-01571-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,19]]},"references-count":45,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["1571"],"URL":"https:\/\/doi.org\/10.1007\/s40747-024-01571-4","relation":{},"ISSN":["2199-4536","2198-6053"],"issn-type":[{"value":"2199-4536","type":"print"},{"value":"2198-6053","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,19]]},"assertion":[{"value":"7 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No conflict of financial interests or personal relationships exit in the submission of this manuscript, and it is approved by all authors for publication.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}