{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T14:36:59Z","timestamp":1777127819147,"version":"3.51.4"},"reference-count":40,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"12","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2025,12,1]]},"DOI":"10.1587\/transfun.2024eap1162","type":"journal-article","created":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T18:06:35Z","timestamp":1748801195000},"page":"1677-1686","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing Skeleton-Based Action Recognition with Feature Maps from Pose Estimation Networks"],"prefix":"10.1587","volume":"E108.A","author":[{"given":"Hao","family":"WEN","sequence":"first","affiliation":[{"name":"The School of Aeronautics and Astronautics, Zhejiang University"}]},{"given":"Zhe-Ming","family":"LU","sequence":"additional","affiliation":[{"name":"The School of Aeronautics and Astronautics, Zhejiang University"}]},{"given":"Fengli","family":"SHEN","sequence":"additional","affiliation":[{"name":"Yangtze Delta Region Institute (Huzhou), University of Electronic Science and Technology of China"}]},{"given":"Ziqian","family":"LU","sequence":"additional","affiliation":[{"name":"The School of Aeronautics and Astronautics, Zhejiang University"}]},{"given":"Yangming","family":"ZHENG","sequence":"additional","affiliation":[{"name":"The School of Aeronautics and Astronautics, Zhejiang University"}]},{"given":"Jialin","family":"CUI","sequence":"additional","affiliation":[{"name":"The School of Information Science and Engineering, NingboTech University"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] S.S. Rautaray and A. Agrawal, \u201cInteraction with virtual game through hand gesture recognition,\u201d 2011 International Conference on Multimedia, Signal Processing and Communication Technologies, pp.244-247, IEEE, 2011. 10.1109\/mspct.2011.6150485","DOI":"10.1109\/MSPCT.2011.6150485"},{"key":"2","doi-asserted-by":"publisher","unstructured":"[2] C.I. Nwakanma, F.B. Islam, M.P. Maharani, J.M. Lee, and D.S. Kim, \u201cDetection and classification of human activity for emergency response in smart factory shop floor,\u201d Applied Sciences, vol.11, no.8, p.3662, 2021. 10.3390\/app11083662","DOI":"10.3390\/app11083662"},{"key":"3","doi-asserted-by":"publisher","unstructured":"[3] H. Wang and L. Wang, \u201cLearning content and style: Joint action recognition and person identification from human skeletons,\u201d Pattern Recognition, vol.81, pp.23-35, 2018. 10.1016\/j.patcog.2018.03.030","DOI":"10.1016\/j.patcog.2018.03.030"},{"key":"4","doi-asserted-by":"crossref","unstructured":"[4] H. Duan, Y. Zhao, K. Chen, D. Lin, and B. Dai, \u201cRevisiting skeleton-based action recognition,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.2969-2978, 2022. 10.1109\/cvpr52688.2022.00298","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"5","doi-asserted-by":"publisher","unstructured":"[26] X. Bruce, Y. Liu, X. Zhang, S.h. Zhong, and K.C. Chan, \u201cMMNet: A model-based multimodal network for human action recognition in RGB-D videos,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.45, no.3, pp.3522-3538, 2022. 10.1109\/tpami.2022.3177813","DOI":"10.1109\/TPAMI.2022.3177813"},{"key":"6","doi-asserted-by":"publisher","unstructured":"[6] B.X. Yu, Y. Liu, and K.C. Chan, \u201cMultimodal fusion via teacher-student network for indoor action recognition,\u201d Proc. AAAI Conference on Artificial Intelligence, vol.35, no.4, pp.3199-3207, May 2021. 10.1609\/aaai.v35i4.16430","DOI":"10.1609\/aaai.v35i4.16430"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] S. Yan, Y. Xiong, and D. Lin, \u201cSpatial temporal graph convolutional networks for skeleton-based action recognition,\u201d CoRR, vol.abs\/1801.07455, 2018. 10.48550\/arXiv.1801.07455","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"8","doi-asserted-by":"crossref","unstructured":"[8] H. Duan, J. Wang, K. Chen, and D. Lin, \u201cPYSKL: Towards good practices for skeleton action recognition,\u201d Proc. 30th ACM International Conference on Multimedia, pp.7351-7354, 2022. 10.1145\/3503161.3548546","DOI":"10.1145\/3503161.3548546"},{"key":"9","doi-asserted-by":"publisher","unstructured":"[9] L. Shi, Y. Zhang, J. Cheng, and H. Lu, \u201cSkeleton-based action recognition with multi-stream adaptive graph convolutional networks,\u201d IEEE Trans. Image Process., vol.29, pp.9532-9545, 2020. 10.1109\/tip.2020.3028207","DOI":"10.1109\/TIP.2020.3028207"},{"key":"10","unstructured":"[10] H. Duan, J. Wang, K. Chen, and D. Lin, \u201cDG-STGCN: Dynamic spatial-temporal modeling for skeleton-based action recognition,\u201d arXiv preprint, arXiv:2210.05895, 2022. 10.48550\/arXiv.2210.05895"},{"key":"11","doi-asserted-by":"publisher","unstructured":"[11] W. Myung, N. Su, J.H. Xue, and G. Wang, \u201cDeGCN: Deformable graph convolutional networks for skeleton-based action recognition,\u201d IEEE Trans. Image Process., vol.33, pp.2477-2490, 2024. 10.1109\/tip.2024.3378886","DOI":"10.1109\/TIP.2024.3378886"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] J. Lee, M. Lee, D. Lee, and S. Lee, \u201cHierarchically decomposed graph convolutional networks for skeleton-based action recognition,\u201d arXiv preprint, arXiv:2208.10741, 2022. 10.48550\/arXiv.2208.10741","DOI":"10.1109\/ICCV51070.2023.00958"},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] J. Liu, X. Wang, C. Wang, Y. Gao, and M. Liu, \u201cTemporal decoupling graph convolutional network for skeleton-based gesture recognition,\u201d IEEE Trans. Multimedia, vol.26, pp.811-823, 2023. 10.1109\/tmm.2023.3271811","DOI":"10.1109\/TMM.2023.3271811"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] H.G. Chi, M.H. Ha, S. Chi, S.W. Lee, Q. Huang, and K. Ramani, \u201cInfoGCN: Representation learning for human skeleton-based action recognition,\u201d 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp.20154-20164, 2022. 10.1109\/cvpr52688.2022.01955","DOI":"10.1109\/CVPR52688.2022.01955"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] Y. Chen, Z. Zhang, C. Yuan, B. Li, Y. Deng, and W. Hu, \u201cChannel-wise topology refinement graph convolution for skeleton-based action recognition,\u201d Proc. IEEE\/CVF International Conference on Computer Vision, pp.13359-13368, 2021. 10.1109\/iccv48922.2021.01311","DOI":"10.1109\/ICCV48922.2021.01311"},{"key":"16","unstructured":"[16] J. Do and M. Kim, \u201cSkateFormer: Skeletal-temporal transformer for human action recognition,\u201d arXiv preprint, arXiv:2403.09508, 2024. 10.48550\/arXiv.2403.09508"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] M. Duhme, R. Memmesheimer, and D. Paulus, \u201cFusion-GCN: Multimodal action recognition using graph convolutional networks,\u201d Pattern Recognition, C. Bauckhage, J. Gall, and A. Schwing, eds., pp.265-281, Springer International Publishing, Cham, 2021. 10.1007\/978-3-030-92659-5_17","DOI":"10.1007\/978-3-030-92659-5_17"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] H. Duan, Y. Zhao, K. Chen, D. Shao, D. Lin, and B. Dai, \u201cRevisiting skeleton-based action recognition,\u201d 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp.2959-2968, 2021. 10.1109\/cvpr52688.2022.00298","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] D. Reilly and S. Das, \u201cJust add <i>\u03c0<\/i>! pose induced video transformers for understanding activities of daily living,\u201d arXiv preprint, arXiv:2311.18840, 2023. 10.48550\/arXiv.2311.18840","DOI":"10.1109\/CVPR52733.2024.01736"},{"key":"20","doi-asserted-by":"publisher","unstructured":"[20] J. Liu, R. Ding, Y. Wen, N. Dai, F. Meng, F.L. Zhang, S. Zhao, and M. Liu, \u201cExplore human parsing modality for action recognition,\u201d CAAI Trans. Intelligence Technology (CAAI TIT), vol.9, no.6, pp.1623-1633, 2024. 10.1049\/cit2.12366","DOI":"10.1049\/cit2.12366"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] R. Ding, Y. Wen, J. Liu, N. Dai, F. Meng, and M. Liu, \u201cIntegrating human parsing and pose network for human action recognition,\u201d Artificial Intelligence, L. Fang, J. Pei, G. Zhai, and R. Wang, ed., pp.182-194, Springer Nature Singapore, Singapore, 2024. 10.1007\/978-981-99-8850-1_15","DOI":"10.1007\/978-981-99-8850-1_15"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] R. Hachiuma, F. Sato, and T. Sekii, \u201cUnified keypoint-based action recognition framework via structured keypoint pooling,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.22962-22971, 2023. 10.1109\/cvpr52729.2023.02199","DOI":"10.1109\/CVPR52729.2023.02199"},{"key":"23","doi-asserted-by":"publisher","unstructured":"[23] H. Xu, Y. Gao, Z. Hui, J. Li, and X. Gao, \u201cLanguage knowledge-assisted representation learning for skeleton-based action recognition,\u201d arXiv preprint, arXiv:2305.12398, 2023. 10.1109\/tmm.2025.3543034","DOI":"10.1109\/TMM.2025.3543034"},{"key":"24","doi-asserted-by":"publisher","unstructured":"[24] P. Weinzaepfel and G. Rogez, \u201cMimetics: Towards understanding human actions out of context,\u201d Int. J. Comput. Vis., vol.129, no.5, pp.1675-1690, 2021. 10.1007\/s11263-021-01446-y","DOI":"10.1007\/s11263-021-01446-y"},{"key":"25","doi-asserted-by":"publisher","unstructured":"[25] B. Zhou, P. Wang, J. Wan, Y. Liang, and F. Wang, \u201cA unified multimodal de-and re-coupling framework for RGB-D motion recognition,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.45, no.10, pp.11428-11442, 2023. 10.1109\/tpami.2023.3274783","DOI":"10.1109\/TPAMI.2023.3274783"},{"key":"26","doi-asserted-by":"publisher","unstructured":"[26] X. Bruce, Y. Liu, X. Zhang, S.h. Zhong, and K.C. Chan, \u201cMMNet: A model-based multimodal network for human action recognition in RGB-D videos,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.45, no.3, pp.3522-3538, 2022. 10.1109\/tpami.2022.3177813","DOI":"10.1109\/TPAMI.2022.3177813"},{"key":"27","doi-asserted-by":"crossref","unstructured":"[27] S. Das, S. Sharma, R. Dai, F. Bremond, and M. Thonnat, \u201cVPN: Learning video-pose embedding for activities of daily living,\u201d European Conference on Computer Vision, pp.72-90, Springer, 2020. 10.1007\/978-3-030-58545-7_5","DOI":"10.1007\/978-3-030-58545-7_5"},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] D. Reilly and S. Das, \u201cJust add <i>\u03c0<\/i>! pose induced video transformers for understanding activities of daily living,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.18340-18350, 2024. 10.1109\/cvpr52733.2024.01736","DOI":"10.1109\/CVPR52733.2024.01736"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] B. Cheng, B. Xiao, J. Wang, H. Shi, T.S. Huang, and L. Zhang, \u201cHigherHRNet: Scale-aware representation learning for bottom-up human pose estimation,\u201d CVPR, 2020. 10.1109\/cvpr42600.2020.00543","DOI":"10.1109\/CVPR42600.2020.00543"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] K. Sun, B. Xiao, D. Liu, and J. Wang, \u201cDeep high-resolution representation learning for human pose estimation,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.5693-5703, 2019. 10.1109\/cvpr.2019.00584","DOI":"10.1109\/CVPR.2019.00584"},{"key":"31","doi-asserted-by":"publisher","unstructured":"[31] Q. Cheng, J. Cheng, Z. Ren, Q. Zhang, and J. Liu, \u201cMulti-scale spatial-temporal convolutional neural network for skeleton-based action recognition,\u201d Pattern Analysis and Applications, vol.26, no.3, pp.1303-1315, 2023. 10.1007\/s10044-023-01156-w","DOI":"10.1007\/s10044-023-01156-w"},{"key":"32","doi-asserted-by":"crossref","unstructured":"[32] Y. Zhou, X. Yan, Z.Q. Cheng, Y. Yan, Q. Dai, and X.S. Hua, \u201cBlockgcn: Redefining topology awareness for skeleton-based action recognition,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024. 10.1109\/cvpr52733.2024.00200","DOI":"10.1109\/CVPR52733.2024.00200"},{"key":"33","doi-asserted-by":"crossref","unstructured":"[33] A. Shahroudy, J. Liu, T.T. Ng, and G. Wang, \u201cNTU RGB+D: A large scale dataset for 3D human activity analysis,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.1010-1019, 2016. 10.1109\/cvpr.2016.115","DOI":"10.1109\/CVPR.2016.115"},{"key":"34","doi-asserted-by":"publisher","unstructured":"[34] J. Liu, A. Shahroudy, M. Perez, G. Wang, L.Y. Duan, and A.C. Kot, \u201cNTU RGB+D 120: A large-scale benchmark for 3D human activity understanding,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.42, no.10, pp.2684-2701, 2019. 10.1109\/tpami.2019.2916873","DOI":"10.1109\/TPAMI.2019.2916873"},{"key":"35","doi-asserted-by":"crossref","unstructured":"[35] T. Li, J. Liu, W. Zhang, Y. Ni, W. Wang, and Z. Li, \u201cUAV-human: A large benchmark for human behavior understanding with unmanned aerial vehicles,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.16266-16275, 2021. 10.1109\/cvpr46437.2021.01600","DOI":"10.1109\/CVPR46437.2021.01600"},{"key":"36","doi-asserted-by":"crossref","unstructured":"[36] J. Carreira and A. Zisserman, \u201cQuo vadis, action recognition? A new model and the kinetics dataset,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.6299-6308, 2017. 10.1109\/cvpr.2017.502","DOI":"10.1109\/CVPR.2017.502"},{"key":"37","doi-asserted-by":"crossref","unstructured":"[37] L. Shi, Y. Zhang, J. Cheng, and H. Lu, \u201cSkeleton-based action recognition with directed graph neural networks,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.7912-7921, 2019. 10.1109\/cvpr.2019.00810","DOI":"10.1109\/CVPR.2019.00810"},{"key":"38","doi-asserted-by":"crossref","unstructured":"[38] L. Shi, Y. Zhang, J. Cheng, and H. Lu, \u201cTwo-stream adaptive graph convolutional networks for skeleton-based action recognition,\u201d 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp.12018-12027, 2019. 10.1109\/cvpr.2019.01230","DOI":"10.1109\/CVPR.2019.01230"},{"key":"39","doi-asserted-by":"crossref","unstructured":"[39] T. Li, J. Liu, W. Zhang, and L. Duan, \u201cHARD-Net: Hardness-AwaRe discrimination network for 3D early activity prediction,\u201d Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, Proceedings, Part XI 16, pp.420-436, Springer, 2020. 10.1007\/978-3-030-58621-8_25","DOI":"10.1007\/978-3-030-58621-8_25"},{"key":"40","doi-asserted-by":"crossref","unstructured":"[40] K. Cheng, Y. Zhang, X. He, W. Chen, J. Cheng, and H. Lu, \u201cSkeleton-based action recognition with shift graph convolutional network,\u201d Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.183-192, 2020. 10.1109\/cvpr42600.2020.00026","DOI":"10.1109\/CVPR42600.2020.00026"}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/12\/E108.A_2024EAP1162\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T03:24:55Z","timestamp":1764991495000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/12\/E108.A_2024EAP1162\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,1]]},"references-count":40,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2024eap1162","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"value":"0916-8508","type":"print"},{"value":"1745-1337","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,1]]},"article-number":"2024EAP1162"}}