{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:02:56Z","timestamp":1750309376219,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,24]],"date-time":"2024-06-24T00:00:00Z","timestamp":1719187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["41971343"],"award-info":[{"award-number":["41971343"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Postgraduate Research & Practice Innovation Program of Jiangsu Province","award":["181200003023199"],"award-info":[{"award-number":["181200003023199"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,24]]},"DOI":"10.1145\/3686397.3686410","type":"proceedings-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T04:28:51Z","timestamp":1732595331000},"page":"71-78","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Keypoints-based multimodal network for robust human action recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-8969-9130","authenticated-orcid":false,"given":"Zesheng","family":"Hu","sequence":"first","affiliation":[{"name":"School of Computer and Electronic Information\/ Artificial Intelligence, Nanjing Normal University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7475-1910","authenticated-orcid":false,"given":"Genlin","family":"Ji","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information\/ Artificial Intelligence, Nanjing Normal University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2983-9921","authenticated-orcid":false,"given":"Jiaquan","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information\/ Artificial Intelligence, Nanjing Normal University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1764-7084","authenticated-orcid":false,"given":"Bin","family":"Zhao","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information\/ Artificial Intelligence, Nanjing Normal University, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9949-4818","authenticated-orcid":false,"given":"Xichen","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information\/ Artificial Intelligence, Nanjing Normal University, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,11,25]]},"reference":[{"key":"e_1_3_3_1_1_2","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01594-9"},{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.106970"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Xin W. Liu R. Liu Y. Chen Y. Yu W. & Miao Q. 2023. Transformer for Skeleton-based action recognition: A review of recent advances. Neurocomputing.","DOI":"10.1016\/j.neucom.2023.03.001"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122314"},{"issue":"3","key":"e_1_3_3_1_5_2","first-page":"3522","article-title":"Mmnet: A model-based multimodal network for human action recognition in rgb-d videos","volume":"45","author":"Bruce X. B.","year":"2022","unstructured":"Bruce, X. B., Liu, Y., Zhang, X., Zhong, S. H., & Chan, K. C. 2022. Mmnet: A model-based multimodal network for human action recognition in rgb-d videos. IEEE Transactions on Pattern Analysis and Machine Intelligence, 45(3), 3522-3538.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"volume-title":"Human action recognition from various data modalities: A review","author":"Sun Z.","key":"e_1_3_3_1_6_2","unstructured":"Sun, Z., Ke, Q., Rahmani, H., Bennamoun, M., Wang, G., & Liu, J. 2022. Human action recognition from various data modalities: A review. IEEE transactions on pattern analysis and machine intelligence, 45(3), 3200-3225."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3077512"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.01.015"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"e_1_3_3_1_10_2","first-page":"3199","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 35","author":"Bruce X. B.","year":"2021","unstructured":"Bruce, X. B., Liu, Y., & Chan, K. C. 2021. Multimodal fusion via teacher-student network for indoor action recognition. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 35, No. 4, pp. 3199-3207)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00022"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01952"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00278"},{"key":"e_1_3_3_1_16_2","volume-title":"Proceedings, Part IX 16 (pp. 72-90)","author":"Das S.","year":"2020","unstructured":"Das, S., Sharma, S., Dai, R., Bremond, F., & Thonnat, M. 2020. Vpn: Learning video-pose embedding for activities of daily living. In Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IX 16 (pp. 72-90). Springer International Publishing."},{"key":"e_1_3_3_1_17_2","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13289-13299)","author":"Joze H. R. V.","year":"2020","unstructured":"Joze, H. R. V., Shaban, A., Iuzzolino, M. L., & Koishida, K. 2020. MMTM: Multimodal transfer module for CNN fusion. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13289-13299)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00333"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3127885"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/s10489-023-04978-7"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.115"},{"volume-title":"Ntu rgb+ d 120: A large-scale benchmark for 3d human activity understanding","author":"Liu J.","key":"e_1_3_3_1_22_2","unstructured":"Liu, J., Shahroudy, A., Perez, M., Wang, G., Duan, L. Y., & Kot, A. C. 2019. Ntu rgb+ d 120: A large-scale benchmark for 3d human activity understanding. IEEE transactions on pattern analysis and machine intelligence, 42(10), 2684-2701."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01230"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01311"},{"key":"e_1_3_3_1_25_2","unstructured":"Simonyan K. & Zisserman A. 2014. Two-stream convolutional networks for action recognition in videos. Advances in neural information processing systems 27."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00889"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01467-7"},{"key":"e_1_3_3_1_29_2","unstructured":"Vaswani A. Shazeer N. Parmar N. Uszkoreit J. Jones L. Gomez A. N. ... & Polosukhin I. 2017. Attention is all you need. Advances in neural information processing systems 30."},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20013"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01367"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-022-12091-z"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2018.12.050"},{"key":"e_1_3_3_1_34_2","volume-title":"International conference on machine learning (pp. 6105-6114)","author":"Tan M.","year":"2019","unstructured":"Tan, M., & Le, Q. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning (pp. 6105-6114). PMLR."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"}],"event":{"name":"ICISDM 2024: 2024 the 8th International Conference on Information System and Data Mining","acronym":"ICISDM 2024","location":"Los Angeles CA USA"},"container-title":["Proceedings of the 2024 8th International Conference on Information System and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3686397.3686410","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3686397.3686410","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:06:23Z","timestamp":1750291583000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3686397.3686410"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,24]]},"references-count":35,"alternative-id":["10.1145\/3686397.3686410","10.1145\/3686397"],"URL":"https:\/\/doi.org\/10.1145\/3686397.3686410","relation":{},"subject":[],"published":{"date-parts":[[2024,6,24]]},"assertion":[{"value":"2024-11-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}