{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T22:16:21Z","timestamp":1768342581438,"version":"3.49.0"},"reference-count":92,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,10,7]],"date-time":"2022-10-07T00:00:00Z","timestamp":1665100800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,7]],"date-time":"2022-10-07T00:00:00Z","timestamp":1665100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00521-022-07826-w","type":"journal-article","created":{"date-parts":[[2022,10,7]],"date-time":"2022-10-07T19:02:42Z","timestamp":1665169362000},"page":"2007-2024","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["FT-HID: a large-scale RGB-D dataset for first- and third-person human interaction analysis"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2450-492X","authenticated-orcid":false,"given":"Zihui","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yonghong","family":"Hou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pichao","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhimin","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingliang","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanqing","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,10,7]]},"reference":[{"key":"7826_CR1","doi-asserted-by":"crossref","unstructured":"Asadi-Aghbolaghi M, Bertiche H, Roig V, Kasaei S, Escalera S (2017) Action recognition from rgb-d data: Comparison and fusion of spatio-temporal handcrafted features and deep strategies. In: Proceedings of the IEEE International conference on computer vision workshops, pp. 3179\u20133188","DOI":"10.1109\/ICCVW.2017.376"},{"key":"7826_CR2","doi-asserted-by":"crossref","unstructured":"Ben Tanfous A, Drira H, Ben Amor B (2018) Coding kendall\u2019s shape trajectories for 3d action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2840\u20132849","DOI":"10.1109\/CVPR.2018.00300"},{"key":"7826_CR3","doi-asserted-by":"crossref","unstructured":"Bloom V, Argyriou V, Makris D (2013) Dynamic feature selection for online action recognition. In: international workshop on human behavior understanding, pp. 64\u201376. Springer","DOI":"10.1007\/978-3-319-02714-2_6"},{"key":"7826_CR4","doi-asserted-by":"crossref","unstructured":"Bloom V, Argyriou V, Makris D (2014) G3di: A gaming interaction dataset with a real time detection and evaluation framework. In: European conference on computer vision, pp. 698\u2013712. Springer","DOI":"10.1007\/978-3-319-16178-5_49"},{"key":"7826_CR5","doi-asserted-by":"crossref","unstructured":"Bloom V, Makris D, Argyriou V (2012) G3d: a gaming action dataset and real time action recognition evaluation framework. In: 2012 IEEE Computer society conference on computer vision and pattern recognition workshops, pp. 7\u201312. IEEE","DOI":"10.1109\/CVPRW.2012.6239175"},{"issue":"11","key":"7826_CR6","doi-asserted-by":"crossref","first-page":"3247","DOI":"10.1109\/TCSVT.2018.2879913","volume":"29","author":"C Cao","year":"2018","unstructured":"Cao C, Lan C, Zhang Y, Zeng W, Lu H, Zhang Y (2018) Skeleton-based action recognition with gated convolutional neural networks. IEEE Trans Circ Sys Video Tech 29(11):3247\u20133257","journal-title":"IEEE Trans Circ Sys Video Tech"},{"key":"7826_CR7","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"7826_CR8","doi-asserted-by":"crossref","unstructured":"Chen C, Jafari R, Kehtarnavaz N (2015) Action recognition from depth sequences using depth motion maps-based local binary patterns. In: 2015 IEEE winter conference on applications of computer vision, pp. 1092\u20131099. IEEE","DOI":"10.1109\/WACV.2015.150"},{"key":"7826_CR9","doi-asserted-by":"crossref","unstructured":"Chen C, Jafari R, Kehtarnavaz N (2015) Utd-mhad: A multimodal dataset for human action recognition utilizing a depth camera and a wearable inertial sensor. In: 2015 IEEE International conference on image processing (ICIP), pp. 168\u2013172. IEEE","DOI":"10.1109\/ICIP.2015.7350781"},{"key":"7826_CR10","doi-asserted-by":"crossref","unstructured":"Cherian A, Fernando B, Harandi M, Gould S (2017) Generalized rank pooling for activity recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3222\u20133231","DOI":"10.1109\/CVPR.2017.172"},{"key":"7826_CR11","doi-asserted-by":"crossref","unstructured":"Ding Z, Wang P, Ogunbona PO, Li W (2017) Investigation of different skeleton features for cnn-based 3d action recognition. In: 2017 IEEE International conference on multimedia & expo workshops (ICMEW), pp. 617\u2013622. IEEE","DOI":"10.1109\/ICMEW.2017.8026286"},{"issue":"2","key":"7826_CR12","doi-asserted-by":"crossref","first-page":"363","DOI":"10.1109\/TMM.2018.2859620","volume":"21","author":"Z Fan","year":"2018","unstructured":"Fan Z, Zhao X, Lin T, Su H (2018) Attention-based multiview re-observation fusion network for skeletal action recognition. IEEE Trans Multim 21(2):363\u2013374","journal-title":"IEEE Trans Multim"},{"key":"7826_CR13","doi-asserted-by":"crossref","unstructured":"Fathi A, Ren X, Rehg JM (2011) Learning to recognize objects in egocentric activities. In: CVPR 2011, pp. 3281\u20133288. IEEE","DOI":"10.1109\/CVPR.2011.5995444"},{"key":"7826_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Pinz A, Zisserman A (2016) Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1933\u20131941","DOI":"10.1109\/CVPR.2016.213"},{"issue":"4","key":"7826_CR15","doi-asserted-by":"crossref","first-page":"773","DOI":"10.1109\/TPAMI.2016.2558148","volume":"39","author":"B Fernando","year":"2016","unstructured":"Fernando B, Gavves E, Oramas J, Ghodrati A, Tuytelaars T (2016) Rank pooling for action recognition. IEEE Trans Patt Anal Mach Intell 39(4):773\u2013787","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"key":"7826_CR16","doi-asserted-by":"crossref","unstructured":"Gao X, Hu W, Tang J, Liu J, Guo Z (2019) Optimized skeleton-based action recognition via sparsified graph regression. In: Proceedings of the 27th ACM international conference on multimedia, pp. 601\u2013610. ACM","DOI":"10.1145\/3343031.3351170"},{"key":"7826_CR17","doi-asserted-by":"crossref","first-page":"442","DOI":"10.1016\/j.jvcir.2017.03.014","volume":"48","author":"Z Gao","year":"2017","unstructured":"Gao Z, Li S, Zhu Y, Wang C, Zhang H (2017) Collaborative sparse representation leaning model for rgbd action recognition. J Visual Commun Image Represent 48:442\u2013452","journal-title":"J Visual Commun Image Represent"},{"key":"7826_CR18","doi-asserted-by":"crossref","first-page":"554","DOI":"10.1016\/j.neucom.2014.06.085","volume":"151","author":"Z Gao","year":"2015","unstructured":"Gao Z, Zhang H, Xu G, Xue Y (2015) Multi-perspective and multi-modality joint representation and recognition model for 3d action recognition. Neurocomputing 151:554\u2013564","journal-title":"Neurocomputing"},{"key":"7826_CR19","doi-asserted-by":"crossref","unstructured":"Garcia NC, Morerio P, Murino V (2018) Modality distillation with multiple stream networks for action recognition. In: Proceedings of the european conference on computer vision (ECCV), pp. 103\u2013118","DOI":"10.1007\/978-3-030-01237-3_7"},{"key":"7826_CR20","doi-asserted-by":"crossref","unstructured":"Garcia-Hernando G, Yuan S, Baek S, Kim TK (2018) First-person hand action benchmark with rgb-d videos and 3d hand pose annotations. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 409\u2013419","DOI":"10.1109\/CVPR.2018.00050"},{"issue":"23","key":"7826_CR21","doi-asserted-by":"crossref","first-page":"16439","DOI":"10.1007\/s00521-021-06239-5","volume":"33","author":"Y Hou","year":"2021","unstructured":"Hou Y, Yu H, Zhou D, Wang P, Ge H, Zhang J, Zhang Q (2021) Local-aware spatio-temporal attention network with multi-stage feature fusion for human action recognition. Neur Comp Appl 33(23):16439\u201316450","journal-title":"Neur Comp Appl"},{"key":"7826_CR22","doi-asserted-by":"crossref","unstructured":"Hu JF, Zheng WS, Lai J, Zhang J (2015) Jointly learning heterogeneous features for rgb-d activity recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 5344\u20135352","DOI":"10.1109\/CVPR.2015.7299172"},{"key":"7826_CR23","doi-asserted-by":"crossref","unstructured":"Hu JF, Zheng WS, Pan J, Lai J, Zhang J (2018) Deep bilinear learning for rgb-d action recognition. In: Proceedings of the European conference on computer vision (ECCV), pp. 335\u2013351","DOI":"10.1007\/978-3-030-01234-2_21"},{"key":"7826_CR24","doi-asserted-by":"crossref","unstructured":"Huang D, Yao S, Wang Y, De La Torre, F (2014) Sequential max-margin event detectors. In: European conference on computer vision, pp. 410\u2013424. Springer","DOI":"10.1007\/978-3-319-10578-9_27"},{"key":"7826_CR25","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1016\/j.patcog.2017.07.013","volume":"72","author":"EP Ijjina","year":"2017","unstructured":"Ijjina EP, Chalavadi KM (2017) Human action recognition in rgb-d videos using motion sequence information and deep learning. Patt Recognit 72:504\u2013516","journal-title":"Patt Recognit"},{"key":"7826_CR26","doi-asserted-by":"crossref","unstructured":"Imran J, Kumar P (2016) Human action recognition using rgb-d sensor and deep convolutional neural networks. In: 2016 International conference on advances in computing, communications and informatics (ICACCI), pp. 144\u2013148. IEEE","DOI":"10.1109\/ICACCI.2016.7732038"},{"key":"7826_CR27","doi-asserted-by":"crossref","unstructured":"Ji Y, Xu F, Yang Y, Shen F, Shen HT, Zheng WS (2018) A large-scale rgb-d database for arbitrary-view human action recognition. In: 2018 ACM Multimedia conference on multimedia conference, pp. 1510\u20131518. ACM","DOI":"10.1145\/3240508.3240675"},{"issue":"10","key":"7826_CR28","doi-asserted-by":"crossref","first-page":"4641","DOI":"10.1109\/TIP.2016.2589320","volume":"25","author":"C Jia","year":"2016","unstructured":"Jia C, Fu Y (2016) Low-rank tensor subspace learning for rgb-d action recognition. IEEE Trans Image Process 25(10):4641\u20134652","journal-title":"IEEE Trans Image Process"},{"key":"7826_CR29","doi-asserted-by":"crossref","unstructured":"Joachims T (2006) Training linear svms in linear time. In: Proceedings of the 12th ACM SIGKDD International conference on Knowledge discovery and data mining, pp. 217\u2013226","DOI":"10.1145\/1150402.1150429"},{"issue":"14","key":"7826_CR30","doi-asserted-by":"crossref","first-page":"10423","DOI":"10.1007\/s00521-019-04578-y","volume":"32","author":"SA Khowaja","year":"2020","unstructured":"Khowaja SA, Lee SL (2020) Hybrid and hierarchical fusion networks: a deep cross-modal learning architecture for action recognition. Neur Comp Appl 32(14):10423\u201310434","journal-title":"Neur Comp Appl"},{"key":"7826_CR31","doi-asserted-by":"crossref","first-page":"537","DOI":"10.1016\/j.jvcir.2019.02.013","volume":"59","author":"J Kong","year":"2019","unstructured":"Kong J, Liu T, Jiang M (2019) Collaborative multimodal feature learning for rgb-d action recognition. J Visu Commun Image Represent 59:537\u2013549","journal-title":"J Visu Commun Image Represent"},{"key":"7826_CR32","doi-asserted-by":"crossref","unstructured":"Kong Y, Fu Y (2015) Bilinear heterogeneous information machine for rgb-d action recognition. In: Proceedings of the IEEE Conference on computer vision and pattern recognition, pp. 1054\u20131062","DOI":"10.1109\/CVPR.2015.7298708"},{"issue":"6","key":"7826_CR33","doi-asserted-by":"crossref","first-page":"2856","DOI":"10.1109\/TIP.2016.2556940","volume":"25","author":"Y Kong","year":"2016","unstructured":"Kong Y, Fu Y (2016) Discriminative relational representation learning for rgb-d action recognition. IEEE Trans Image Process 25(6):2856\u20132865","journal-title":"IEEE Trans Image Process"},{"issue":"3","key":"7826_CR34","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1007\/s11263-016-0982-6","volume":"123","author":"Y Kong","year":"2017","unstructured":"Kong Y, Fu Y (2017) Max-margin heterogeneous information machine for rgb-d action recognition. Int J Comp Vision 123(3):350\u2013371","journal-title":"Int J Comp Vision"},{"key":"7826_CR35","doi-asserted-by":"crossref","unstructured":"Koperski M, Bremond, F (2016) Modeling spatial layout of features for real world scenario rgb-d action recognition. In: 2016 13th IEEE international conference on advanced video and signal based surveillance (AVSS), pp. 44\u201350. IEEE","DOI":"10.1109\/AVSS.2016.7738023"},{"key":"7826_CR36","doi-asserted-by":"crossref","unstructured":"Li B, Li X, Zhang Z, Wu F (2019) Spatio-temporal graph routing for skeleton-based action recognition. In Proceedings of the AAAI Conference on Artificial Intelligence 33(1), pp 8561-8568","DOI":"10.1609\/aaai.v33i01.33018561"},{"issue":"5","key":"7826_CR37","doi-asserted-by":"crossref","first-page":"624","DOI":"10.1109\/LSP.2017.2678539","volume":"24","author":"C Li","year":"2017","unstructured":"Li C, Hou Y, Wang P, Li W (2017) Joint distance maps based action recognition with convolutional neural networks. IEEE Sign Process Lett 24(5):624\u2013628","journal-title":"IEEE Sign Process Lett"},{"key":"7826_CR38","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2021.3126637","author":"C Li","year":"2021","unstructured":"Li C, Li S, Gao Y, Zhang X, Li W (2021) A two-stream neural network for pose-based hand gesture recognition. IEEE Trans Cognit Develop Sys. https:\/\/doi.org\/10.1109\/TCDS.2021.3126637","journal-title":"IEEE Trans Cognit Develop Sys"},{"key":"7826_CR39","doi-asserted-by":"crossref","unstructured":"Li M, Chen S, Chen X, Zhang Y, Wang Y, Tian, Q (2019) Actional-structural graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 3595\u20133603","DOI":"10.1109\/CVPR.2019.00371"},{"key":"7826_CR40","doi-asserted-by":"crossref","unstructured":"Li W, Zhang Z, Liu Z (2010) Action recognition based on a bag of 3d points. In: 2010 IEEE computer society conference on computer vision and pattern recognition-workshops, pp. 9\u201314. IEEE","DOI":"10.1109\/CVPRW.2010.5543273"},{"key":"7826_CR41","doi-asserted-by":"crossref","first-page":"107293","DOI":"10.1016\/j.patcog.2020.107293","volume":"103","author":"Y Li","year":"2020","unstructured":"Li Y, Xia R, Liu X (2020) Learning shape and motion representations for view invariant skeleton-based action recognition. Patt Recognit 103:107293","journal-title":"Patt Recognit"},{"key":"7826_CR42","doi-asserted-by":"crossref","first-page":"74","DOI":"10.1016\/j.sigpro.2014.08.038","volume":"112","author":"AA Liu","year":"2015","unstructured":"Liu AA, Nie WZ, Su YT, Ma L, Hao T, Yang ZX (2015) Coupled hidden conditional random fields for rgb-d human action recognition. Sig Process 112:74\u201382","journal-title":"Sig Process"},{"issue":"7","key":"7826_CR43","doi-asserted-by":"crossref","first-page":"1781","DOI":"10.1109\/TCYB.2016.2582918","volume":"47","author":"AA Liu","year":"2016","unstructured":"Liu AA, Xu N, Nie WZ, Su YT, Wong Y, Kankanhalli M (2016) Benchmarking a multimodal and multiview and interactive dataset for human action recognition. IEEE Trans Cybern 47(7):1781\u20131794","journal-title":"IEEE Trans Cybern"},{"key":"7826_CR44","doi-asserted-by":"crossref","first-page":"544","DOI":"10.1016\/j.neucom.2014.04.090","volume":"151","author":"AA Liu","year":"2015","unstructured":"Liu AA, Xu N, Su YT, Lin H, Hao T, Yang ZX (2015) Single\/multi-view human action recognition via regularized multi-task learning. Neurocomputing 151:544\u2013553","journal-title":"Neurocomputing"},{"key":"7826_CR45","doi-asserted-by":"crossref","first-page":"79","DOI":"10.1016\/j.neucom.2013.12.061","volume":"149","author":"H Liu","year":"2015","unstructured":"Liu H, Yuan M, Sun F (2015) Rgb-d action recognition using linear coding. Neurocomputing 149:79\u201385","journal-title":"Neurocomputing"},{"key":"7826_CR46","doi-asserted-by":"crossref","first-page":"70061","DOI":"10.1109\/ACCESS.2018.2880231","volume":"6","author":"J Liu","year":"2018","unstructured":"Liu J, Akhtar N, Ajmal M (2018) Viewpoint invariant action recognition using rgb-d videos. IEEE Access 6:70061\u201370071","journal-title":"IEEE Access"},{"issue":"10","key":"7826_CR47","doi-asserted-by":"crossref","first-page":"2684","DOI":"10.1109\/TPAMI.2019.2916873","volume":"42","author":"J Liu","year":"2019","unstructured":"Liu J, Shahroudy A, Perez ML, Wang G, Duan LY, Chichung AK (2019) Ntu rgb+ d 120: a large-scale benchmark for 3d human activity understanding. IEEE Trans Patt Anal Mach Intell 42(10):2684\u20132701","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"issue":"18","key":"7826_CR48","doi-asserted-by":"crossref","first-page":"14593","DOI":"10.1007\/s00521-020-05144-7","volume":"32","author":"Z Liu","year":"2020","unstructured":"Liu Z, Li Z, Wang R, Zong M, Ji W (2020) Spatiotemporal saliency-based multi-stream networks with attention-aware lstm for action recognition. Neur Comp Appl 32(18):14593\u201314602","journal-title":"Neur Comp Appl"},{"issue":"4","key":"7826_CR49","doi-asserted-by":"crossref","first-page":"1226","DOI":"10.1109\/TSMCB.2012.2226879","volume":"43","author":"A Mansur","year":"2012","unstructured":"Mansur A, Makihara Y, Yagi Y (2012) Inverse dynamics for action recognition. IEEE Trans Cybern 43(4):1226\u20131236","journal-title":"IEEE Trans Cybern"},{"key":"7826_CR50","doi-asserted-by":"crossref","unstructured":"Moghimi M, Azagra P, Montesano L, Murillo AC, Belongie S (2014) Experiments on an rgb-d wearable vision system for egocentric activity recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp. 597\u2013603","DOI":"10.1109\/CVPRW.2014.94"},{"key":"7826_CR51","doi-asserted-by":"crossref","unstructured":"Negin F, \u00d6zdemir, F, Akg\u00fcl CB, Y\u00fcksel KA, Er\u00e7il, A (2013) A decision forest based feature selection framework for action recognition from rgb-depth cameras. In: International conference image analysis and recognition, pp. 648\u2013657. Springer","DOI":"10.1007\/978-3-642-39094-4_74"},{"issue":"8","key":"7826_CR52","doi-asserted-by":"crossref","first-page":"3959","DOI":"10.1109\/TIP.2019.2907048","volume":"28","author":"Q Nie","year":"2019","unstructured":"Nie Q, Wang J, Wang X, Liu Y (2019) View-invariant human action recognition based on a 3d bio-constrained skeleton model. IEEE Trans Image Process 28(8):3959\u20133972","journal-title":"IEEE Trans Image Process"},{"key":"7826_CR53","doi-asserted-by":"crossref","unstructured":"Oreifej O, Liu Z (2013) Hon4d: histogram of oriented 4d normals for activity recognition from depth sequences. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 716\u2013723","DOI":"10.1109\/CVPR.2013.98"},{"key":"7826_CR54","doi-asserted-by":"crossref","unstructured":"Pirsiavash H, Ramanan D (2012) Detecting activities of daily living in first-person camera views. In: 2012 IEEE conference on computer vision and pattern recognition, pp. 2847\u20132854. IEEE","DOI":"10.1109\/CVPR.2012.6248010"},{"key":"7826_CR55","doi-asserted-by":"crossref","unstructured":"Rahmani H, Mahmood A, Huynh DQ, Mian A (2014) Hopc: Histogram of oriented principal components of 3d pointclouds for action recognition. In: European conference on computer vision, pp. 742\u2013757. Springer","DOI":"10.1007\/978-3-319-10605-2_48"},{"issue":"7","key":"7826_CR56","doi-asserted-by":"crossref","first-page":"530","DOI":"10.1049\/iet-cvi.2016.0326","volume":"11","author":"B Seddik","year":"2017","unstructured":"Seddik B, Gazzah S, Amara NEB (2017) Human-action recognition using a multi-layered fusion scheme of kinect modalities. IET Comp Vision 11(7):530\u2013540","journal-title":"IET Comp Vision"},{"key":"7826_CR57","doi-asserted-by":"crossref","unstructured":"Shahroudy A, Liu J, Ng TT Wang G (2016) Ntu rgb+ d: a large scale dataset for 3d human activity analysis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1010\u20131019","DOI":"10.1109\/CVPR.2016.115"},{"issue":"5","key":"7826_CR58","doi-asserted-by":"crossref","first-page":"1045","DOI":"10.1109\/TPAMI.2017.2691321","volume":"40","author":"A Shahroudy","year":"2017","unstructured":"Shahroudy A, Ng TT, Gong Y, Wang G (2017) Deep multimodal feature analysis for action recognition in rgb+ d videos. IEEE Trans Patt Anal Mach Intell 40(5):1045\u20131058","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"issue":"1","key":"7826_CR59","doi-asserted-by":"crossref","first-page":"160","DOI":"10.1109\/TCSVT.2020.2965574","volume":"31","author":"Z Shao","year":"2020","unstructured":"Shao Z, Li Y, Zhang H (2020) Learning representations from skeletal self-similarities for cross-view action recognition. IEEE Trans Circ Sys Video Tech 31(1):160\u2013174","journal-title":"IEEE Trans Circ Sys Video Tech"},{"key":"7826_CR60","doi-asserted-by":"crossref","unstructured":"Shi L, Zhang Y, Cheng J, Lu, H (2019) Skeleton-based action recognition with directed graph neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7912\u20137921","DOI":"10.1109\/CVPR.2019.00810"},{"key":"7826_CR61","doi-asserted-by":"crossref","unstructured":"Shi L, Zhang Y, Cheng J, Lu H (2019) Two-stream adaptive graph convolutional networks for skeleton-based action recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 12026\u201312035","DOI":"10.1109\/CVPR.2019.01230"},{"key":"7826_CR62","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"7826_CR63","doi-asserted-by":"crossref","unstructured":"Song S, Cheung NM, Chandrasekhar V, Mandal B, Liri J (2016) Egocentric activity recognition with multimodal fisher vector. In: 2016 IEEE International conference on acoustics, speech and signal processing (ICASSP), pp. 2717\u20132721. IEEE","DOI":"10.1109\/ICASSP.2016.7472171"},{"issue":"4","key":"7826_CR64","doi-asserted-by":"crossref","first-page":"426","DOI":"10.1109\/LSP.2014.2361901","volume":"22","author":"Y Song","year":"2014","unstructured":"Song Y, Liu S, Tang J (2014) Describing trajectory of surface patch for human action recognition on rgb and depth videos. IEEE Sig Process Lett 22(4):426\u2013429","journal-title":"IEEE Sig Process Lett"},{"issue":"10","key":"7826_CR65","doi-asserted-by":"crossref","first-page":"3001","DOI":"10.1109\/TCSVT.2018.2875441","volume":"29","author":"Y Tang","year":"2018","unstructured":"Tang Y, Wang Z, Lu J, Feng J, Zhou J (2018) Multi-stream deep neural networks for rgb-d egocentric action recognition. IEEE Trans Circ Sys Video Tech 29(10):3001\u20133015","journal-title":"IEEE Trans Circ Sys Video Tech"},{"key":"7826_CR66","doi-asserted-by":"crossref","unstructured":"Van Gemeren C, Tan RT, Poppe R, Veltkamp RC (2014) Dyadic interaction detection from pose and flow. In: International workshop on human behavior understanding, pp. 101\u2013115. Springer","DOI":"10.1007\/978-3-319-11839-0_9"},{"key":"7826_CR67","doi-asserted-by":"crossref","unstructured":"Vernikos I, Mathe E, Papadakis A, Spyrou E, Mylonas P (2019) An image representation of skeletal data for action recognition using convolutional neural networks. In: Proceedings of the 12th ACM International conference on pervasive technologies related to assistive environments, pp. 325\u2013326. ACM","DOI":"10.1145\/3316782.3322740"},{"issue":"9","key":"7826_CR68","doi-asserted-by":"crossref","first-page":"4382","DOI":"10.1109\/TIP.2018.2837386","volume":"27","author":"H Wang","year":"2018","unstructured":"Wang H, Wang L (2018) Beyond joints: learning representations from primitive geometries for skeleton-based action recognition and detection. IEEE Trans Image Process 27(9):4382\u20134394","journal-title":"IEEE Trans Image Process"},{"key":"7826_CR69","unstructured":"Wang J, Liu Z, Wu Y, Yuan J (2012) Mining actionlet ensemble for action recognition with depth cameras. In: 2012 IEEE Conference on computer vision and pattern recognition, pp. 1290\u20131297. IEEE"},{"key":"7826_CR70","doi-asserted-by":"crossref","unstructured":"Wang J, Nie X, Xia Y, Wu Y, Zhu, SC (2014) Cross-view action modeling, learning and recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2649\u20132656","DOI":"10.1109\/CVPR.2014.339"},{"key":"7826_CR71","doi-asserted-by":"crossref","unstructured":"Wang K, Wang X, Lin L, Wang M, Zuo W (2014) 3d human activity recognition with reconfigurable convolutional neural networks. In: Proceedings of the 22nd ACM international conference on Multimedia, pp. 97\u2013106","DOI":"10.1145\/2647868.2654912"},{"issue":"4","key":"7826_CR72","doi-asserted-by":"crossref","first-page":"498","DOI":"10.1109\/THMS.2015.2504550","volume":"46","author":"P Wang","year":"2015","unstructured":"Wang P, Li W, Gao Z, Zhang J, Tang C, Ogunbona PO (2015) Action recognition from depth maps using deep convolutional neural networks. IEEE Trans Human-Mach Sys 46(4):498\u2013509","journal-title":"IEEE Trans Human-Mach Sys"},{"key":"7826_CR73","doi-asserted-by":"crossref","first-page":"43","DOI":"10.1016\/j.knosys.2018.05.029","volume":"158","author":"P Wang","year":"2018","unstructured":"Wang P, Li W, Li C, Hou Y (2018) Action recognition based on joint trajectory maps with convolutional neural networks. Knowl-Based Sys 158:43\u201353","journal-title":"Knowl-Based Sys"},{"key":"7826_CR74","doi-asserted-by":"crossref","unstructured":"Wang P, Li W, Wan J, Ogunbona P, Liu X (2018) Cooperative training of deep aggregation networks for rgb-d action recognition. In: Thirty-Second AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v32i1.12228"},{"key":"7826_CR75","doi-asserted-by":"crossref","unstructured":"Wei P, Zhao Y, Zheng N, Zhu SC (2013) Modeling 4d human-object interactions for event and object recognition. In: Proceedings of the IEEE international conference on computer vision, pp. 3272\u20133279","DOI":"10.1109\/ICCV.2013.406"},{"key":"7826_CR76","doi-asserted-by":"crossref","unstructured":"Wen YH, Gao L, Fu H, Zhang FL Xia S (2019) Graph cnns with motif and variable temporal block for skeleton-based action recognition. In: Proceedings of the AAAI conference on artificial intelligence, vol. 33, pp. 8989\u20138996","DOI":"10.1609\/aaai.v33i01.33018989"},{"key":"7826_CR77","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.cviu.2014.06.014","volume":"127","author":"C Wolf","year":"2014","unstructured":"Wolf C, Lombardi E, Mille J, Celiktutan O, Jiu M, Dogan E, Eren G, Baccouche M, Dellandr\u00e9a E, Bichot CE et al (2014) Evaluation of video activity localizations integrating quality and quantity measurements. Comp Vis Image Underst 127:14\u201330","journal-title":"Comp Vis Image Underst"},{"key":"7826_CR78","doi-asserted-by":"crossref","unstructured":"Xia L, Chen CC, Aggarwal JK (2012) View invariant human action recognition using histograms of 3d joints. In: 2012 IEEE Computer society conference on computer vision and pattern recognition workshops, pp. 20\u201327. IEEE","DOI":"10.1109\/CVPRW.2012.6239233"},{"key":"7826_CR79","doi-asserted-by":"crossref","unstructured":"Xia L, Gori I, Aggarwal JK, Ryoo MS (2015) Robot-centric activity recognition from first-person rgb-d videos. In: 2015 IEEE winter conference on applications of computer vision, pp. 357\u2013364. IEEE","DOI":"10.1109\/WACV.2015.54"},{"key":"7826_CR80","unstructured":"Xingjian S, Chen Z, Wang H, Yeung DY, Wong WK, Woo Wc (2015) Convolutional lstm network: a machine learning approach for precipitation nowcasting. In: Advances in neural information processing systems, pp. 802\u2013810"},{"key":"7826_CR81","doi-asserted-by":"crossref","unstructured":"Xu N, Liu A, Nie W, Wong Y, Li F, Su Y (2015) Multi-modal & multi-view & interactive benchmark dataset for human action recognition. In: Proceedings of the 23rd ACM international conference on Multimedia, pp. 1195\u20131198","DOI":"10.1145\/2733373.2806315"},{"issue":"8","key":"7826_CR82","doi-asserted-by":"crossref","first-page":"2405","DOI":"10.1109\/TCSVT.2018.2864148","volume":"29","author":"Z Yang","year":"2018","unstructured":"Yang Z, Li Y, Yang J, Luo J (2018) Action recognition with spatio-temporal visual attention on skeleton image sequences. IEEE Trans Circ Sys Video Tech 29(8):2405\u20132415","journal-title":"IEEE Trans Circ Sys Video Tech"},{"issue":"8","key":"7826_CR83","doi-asserted-by":"crossref","first-page":"1651","DOI":"10.1109\/TPAMI.2015.2491925","volume":"38","author":"M Yu","year":"2015","unstructured":"Yu M, Liu L, Shao L (2015) Structure-preserving binary representations for rgb-d action recognition. IEEE Trans Patt Anal Mach Intell 38(8):1651\u20131664","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"issue":"8","key":"7826_CR84","doi-asserted-by":"crossref","first-page":"1651","DOI":"10.1109\/TPAMI.2015.2491925","volume":"38","author":"M Yu","year":"2016","unstructured":"Yu M, Liu L, Shao L (2016) Structure-preserving binary representations for rgb-d action recognition. IEEE Trans Patt Anal Mach Intell 38(8):1651\u20131664","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"key":"7826_CR85","doi-asserted-by":"crossref","unstructured":"Yun K, Honorio J, Chattopadhyay D, Berg TL, Samaras D (2012) Two-person interaction detection using body-pose features and multiple instance learning. In: 2012 IEEE Computer society conference on computer vision and pattern recognition workshops, pp. 28\u201335. IEEE","DOI":"10.1109\/CVPRW.2012.6239234"},{"issue":"4","key":"7826_CR86","first-page":"12","volume":"2","author":"C Zhang","year":"2012","unstructured":"Zhang C, Tian Y (2012) Rgb-d camera-based daily living activity recognition. J Comp Vis Image Process 2(4):12","journal-title":"J Comp Vis Image Process"},{"issue":"8","key":"7826_CR87","doi-asserted-by":"crossref","first-page":"1963","DOI":"10.1109\/TPAMI.2019.2896631","volume":"41","author":"P Zhang","year":"2019","unstructured":"Zhang P, Lan C, Xing J, Zeng W, Xue J, Zheng N (2019) View adaptive neural networks for high performance skeleton-based human action recognition. IEEE Trans Patt Anal Mach Intell 41(8):1963\u20131978","journal-title":"IEEE Trans Patt Anal Mach Intell"},{"issue":"9","key":"7826_CR88","doi-asserted-by":"crossref","first-page":"2330","DOI":"10.1109\/TMM.2018.2802648","volume":"20","author":"S Zhang","year":"2018","unstructured":"Zhang S, Yang Y, Xiao J, Liu X, Yang Y, Xie D, Zhuang Y (2018) Fusing geometric features for skeleton-based action recognition using multilayer lstm networks. IEEE Trans Multim 20(9):2330\u20132343","journal-title":"IEEE Trans Multim"},{"issue":"5","key":"7826_CR89","doi-asserted-by":"crossref","first-page":"1038","DOI":"10.1109\/TMM.2018.2808769","volume":"20","author":"Y Zhang","year":"2018","unstructured":"Zhang Y, Cao C, Cheng J, Lu H (2018) Egogesture: a new dataset and benchmark for egocentric hand gesture recognition. IEEE Trans Multim 20(5):1038\u20131050","journal-title":"IEEE Trans Multim"},{"key":"7826_CR90","doi-asserted-by":"crossref","first-page":"4517","DOI":"10.1109\/ACCESS.2017.2684186","volume":"5","author":"G Zhu","year":"2017","unstructured":"Zhu G, Zhang L, Shen P, Song J (2017) Multimodal gesture recognition using 3-d convolution and convolutional lstm. IEEE Access 5:4517\u20134524","journal-title":"IEEE Access"},{"key":"7826_CR91","doi-asserted-by":"crossref","first-page":"101","DOI":"10.1016\/j.neucom.2018.08.018","volume":"317","author":"Z Zhu","year":"2018","unstructured":"Zhu Z, Ji H, Zhang W, Xu Y (2018) Rank pooling dynamic network: Learning end-to-end dynamic characteristic for action recognition. Neurocomputing 317:101\u2013109","journal-title":"Neurocomputing"},{"issue":"10","key":"7826_CR92","doi-asserted-by":"crossref","first-page":"5167","DOI":"10.1007\/s00521-020-05313-8","volume":"33","author":"M Zong","year":"2021","unstructured":"Zong M, Wang R, Chen Z, Wang M, Wang X, Potgieter J (2021) Multi-cue based 3d residual network for action recognition. Neur Comp Appl 33(10):5167\u20135181","journal-title":"Neur Comp Appl"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07826-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07826-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07826-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T10:38:26Z","timestamp":1728124706000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07826-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,7]]},"references-count":92,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["7826"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07826-w","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,7]]},"assertion":[{"value":"28 February 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 September 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 October 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}