{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,24]],"date-time":"2026-01-24T19:45:33Z","timestamp":1769283933695,"version":"3.49.0"},"reference-count":29,"publisher":"MDPI AG","issue":"16","license":[{"start":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T00:00:00Z","timestamp":1724112000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62177012"],"award-info":[{"award-number":["62177012"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62267003"],"award-info":[{"award-number":["62267003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["2024GXNSFDA010048"],"award-info":[{"award-number":["2024GXNSFDA010048"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["GXKL06240107"],"award-info":[{"award-number":["GXKL06240107"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["YCBZ2024160"],"award-info":[{"award-number":["YCBZ2024160"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["2023KY1870"],"award-info":[{"award-number":["2023KY1870"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["62177012"],"award-info":[{"award-number":["62177012"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["62267003"],"award-info":[{"award-number":["62267003"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["2024GXNSFDA010048"],"award-info":[{"award-number":["2024GXNSFDA010048"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["GXKL06240107"],"award-info":[{"award-number":["GXKL06240107"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["YCBZ2024160"],"award-info":[{"award-number":["YCBZ2024160"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Guangxi Natural Science Foundation","doi-asserted-by":"publisher","award":["2023KY1870"],"award-info":[{"award-number":["2023KY1870"]}],"id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["62177012"],"award-info":[{"award-number":["62177012"]}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["62267003"],"award-info":[{"award-number":["62267003"]}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["2024GXNSFDA010048"],"award-info":[{"award-number":["2024GXNSFDA010048"]}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["GXKL06240107"],"award-info":[{"award-number":["GXKL06240107"]}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["YCBZ2024160"],"award-info":[{"award-number":["YCBZ2024160"]}]},{"name":"Project of Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory","award":["2023KY1870"],"award-info":[{"award-number":["2023KY1870"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["62177012"],"award-info":[{"award-number":["62177012"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["62267003"],"award-info":[{"award-number":["62267003"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["2024GXNSFDA010048"],"award-info":[{"award-number":["2024GXNSFDA010048"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["GXKL06240107"],"award-info":[{"award-number":["GXKL06240107"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["YCBZ2024160"],"award-info":[{"award-number":["YCBZ2024160"]}]},{"name":"Innovation Project of Guangxi Graduate Education","award":["2023KY1870"],"award-info":[{"award-number":["2023KY1870"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["62177012"],"award-info":[{"award-number":["62177012"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["62267003"],"award-info":[{"award-number":["62267003"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["2024GXNSFDA010048"],"award-info":[{"award-number":["2024GXNSFDA010048"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["GXKL06240107"],"award-info":[{"award-number":["GXKL06240107"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["YCBZ2024160"],"award-info":[{"award-number":["YCBZ2024160"]}]},{"name":"Project for Improving the Basic Scientific Research Abilities of Young and Middle-aged Teachers in Guangxi Colleges and Universities","award":["2023KY1870"],"award-info":[{"award-number":["2023KY1870"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Sensors"],"abstract":"<jats:p>The precise recognition of entire classroom meta-actions is a crucial challenge for the tailored adaptive interpretation of student behavior, given the intricacy of these actions. This paper proposes a Dynamic Position Embedding-based Model for Student Classroom Complete Meta-Action Recognition (DPE-SAR) based on the Video Swin Transformer. The model utilizes a dynamic positional embedding technique to perform conditional positional encoding. Additionally, it incorporates a deep convolutional network to improve the parsing ability of the spatial structure of meta-actions. The full attention mechanism of ViT3D is used to extract the potential spatial features of actions and capture the global spatial\u2013temporal information of meta-actions. The proposed model exhibits exceptional performance compared to baseline models in action recognition as observed in evaluations on public datasets and smart classroom meta-action recognition datasets. The experimental results confirm the superiority of the model in meta-action recognition.<\/jats:p>","DOI":"10.3390\/s24165371","type":"journal-article","created":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T06:07:35Z","timestamp":1724134055000},"page":"5371","update-policy":"https:\/\/doi.org\/10.3390\/mdpi_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["A Dynamic Position Embedding-Based Model for Student Classroom Complete Meta-Action Recognition"],"prefix":"10.3390","volume":"24","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2429-5782","authenticated-orcid":false,"given":"Zhaoyu","family":"Shou","sequence":"first","affiliation":[{"name":"School of Information and Communication, Guilin University of Electronic Technology, Guilin 541004, China"},{"name":"Guangxi Wireless Broadband Communication and Signal Processing Key Laboratory, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohu","family":"Yuan","sequence":"additional","affiliation":[{"name":"School of Information and Communication, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dongxu","family":"Li","sequence":"additional","affiliation":[{"name":"School of Information and Communication, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1729-1284","authenticated-orcid":false,"given":"Jianwen","family":"Mo","sequence":"additional","affiliation":[{"name":"School of Information and Communication, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huibing","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer and Information Security, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7907-2853","authenticated-orcid":false,"given":"Jingwei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyong","family":"Wu","sequence":"additional","affiliation":[{"name":"Guangxi Key Laboratory of Trusted Software, Guilin University of Electronic Technology, Guilin 541004, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"1968","published-online":{"date-parts":[[2024,8,20]]},"reference":[{"key":"ref_1","doi-asserted-by":"crossref","unstructured":"Shou, Z., Yan, M., Wen, H., Liu, J., Mo, J., and Zhang, H. (2023). Research on Students\u2019 Action Behavior Recognition Method Based on Classroom Time-Series Images. Appl. Sci., 13.","DOI":"10.3390\/app131810426"},{"key":"ref_2","doi-asserted-by":"crossref","unstructured":"Lin, F.C., Ngo, H.H., Dow, C.R., Lam, K.H., and Le, H.L. (2021). Student behavior recognition system for the classroom environment based on skeleton pose estimation and person detection. Sensors, 21.","DOI":"10.3390\/s21165314"},{"key":"ref_3","doi-asserted-by":"crossref","unstructured":"Chen, Z., Huang, W., Liu, H., Wang, Z., Wen, Y., and Wang, S. (2024). ST-TGR: Spatio-Temporal Representation Learning for Skeleton-Based Teaching Gesture Recognition. Sensors, 24.","DOI":"10.3390\/s24082589"},{"key":"ref_4","doi-asserted-by":"crossref","first-page":"820","DOI":"10.1016\/j.future.2021.06.045","article-title":"Human action recognition using attention based LSTM network with dilated CNN features","volume":"125","author":"Muhammad","year":"2021","journal-title":"Future Gener. Comput. Syst."},{"key":"ref_5","doi-asserted-by":"crossref","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., and Hu, H. (2022, January 18\u201324). Video Swin Transformer. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, New Orleans, LA, USA.","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"ref_6","doi-asserted-by":"crossref","unstructured":"Chen, Z., Xie, L., Niu, J., Liu, X., Wei, L., and Tian, Q. (2021, January 10\u201317). Visformer: The vision-friendly transformer. Proceedings of the IEEE\/CVF International Conference on Computer Vision, Montreal, QC, Canada.","DOI":"10.1109\/ICCV48922.2021.00063"},{"key":"ref_7","doi-asserted-by":"crossref","first-page":"33179","DOI":"10.1007\/s11042-021-11403-z","article-title":"3D convolutional networks with multi-layer-pooling selection fusion for video classification","volume":"80","author":"Hu","year":"2021","journal-title":"Multimed. Tools Appl."},{"key":"ref_8","doi-asserted-by":"crossref","unstructured":"Huo, H., and Li, B. (2024). MgMViT: Multi-Granularity and Multi-Scale Vision Transformer for Efficient Action Recognition. Electronics, 13.","DOI":"10.3390\/electronics13050948"},{"key":"ref_9","doi-asserted-by":"crossref","first-page":"286","DOI":"10.1007\/s42979-020-00293-x","article-title":"Human action prediction with 3D-CNN","volume":"1","author":"Alfaifi","year":"2020","journal-title":"SN Comput. Sci."},{"key":"ref_10","doi-asserted-by":"crossref","first-page":"2990","DOI":"10.1109\/TMM.2020.2965434","article-title":"Spatio-temporal attention networks for action recognition and detection","volume":"22","author":"Li","year":"2020","journal-title":"IEEE Trans. Multimed."},{"key":"ref_11","doi-asserted-by":"crossref","first-page":"1377","DOI":"10.1007\/s40747-022-00858-8","article-title":"Parallel temporal feature selection based on improved attention mechanism for dynamic gesture recognition","volume":"9","author":"Li","year":"2023","journal-title":"Complex Intell. Syst."},{"key":"ref_12","doi-asserted-by":"crossref","first-page":"2496","DOI":"10.1109\/TNNLS.2022.3190367","article-title":"An effective video transformer with synchronized spatiotemporal and spatial self-attention for action recognition","volume":"35","author":"Alfasly","year":"2022","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"ref_13","doi-asserted-by":"crossref","first-page":"108487","DOI":"10.1016\/j.patcog.2021.108487","article-title":"Action transformer: A self-attention model for short-time pose-based human action recognition","volume":"124","author":"Mazzia","year":"2022","journal-title":"Pattern Recognit."},{"key":"ref_14","doi-asserted-by":"crossref","first-page":"72227","DOI":"10.1109\/ACCESS.2023.3293813","article-title":"Vit-ret: Vision and recurrent transformer neural networks for human activity recognition in videos","volume":"11","author":"Wensel","year":"2023","journal-title":"IEEE Access"},{"key":"ref_15","first-page":"16664","article-title":"Adaptformer: Adapting vision transformers for scalable visual recognition","volume":"35","author":"Chen","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_16","unstructured":"Bertasius, G., Wang, H., and Torresani, L. Is space-time attention all you need for video understanding? In Proceedings of the International Conference on Machine Learning (ICML), Virtual, 18\u201324 July 2021."},{"key":"ref_17","doi-asserted-by":"crossref","first-page":"11109","DOI":"10.1007\/s11063-023-11367-1","article-title":"Swin-fusion: Swin-transformer with feature fusion for human action recognition","volume":"55","author":"Chen","year":"2023","journal-title":"Neural Process. Lett."},{"key":"ref_18","first-page":"19594","article-title":"Space-time mixing attention for video transformer","volume":"34","author":"Bulat","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref_19","doi-asserted-by":"crossref","unstructured":"Liu, G., Zhang, C., Xu, Q., Cheng, R., Song, Y., Yuan, X., and Sun, J. (2020). I3d-shufflenet based human action recognition. Algorithms, 13.","DOI":"10.3390\/a13110301"},{"key":"ref_20","doi-asserted-by":"crossref","first-page":"109905","DOI":"10.1016\/j.patcog.2023.109905","article-title":"Relative-position embedding based spatially and temporally decoupled Transformer for action recognition","volume":"145","author":"Ma","year":"2024","journal-title":"Pattern Recognit."},{"key":"ref_21","doi-asserted-by":"crossref","first-page":"1484","DOI":"10.1007\/s11263-020-01409-9","article-title":"Spatial\u2013temporal relation reasoning for action prediction in videos","volume":"129","author":"Wu","year":"2021","journal-title":"Int. J. Comput. Vis."},{"key":"ref_22","doi-asserted-by":"crossref","first-page":"107101","DOI":"10.1016\/j.asoc.2021.107101","article-title":"Att-Net: Enhanced emotion recognition system using lightweight self-attention module","volume":"102","author":"Kwon","year":"2021","journal-title":"Appl. Soft Comput."},{"key":"ref_23","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1109\/TMM.2021.3057503","article-title":"Temporal cross-layer correlation mining for action recognition","volume":"24","author":"Zhu","year":"2021","journal-title":"IEEE Trans. Multimed."},{"key":"ref_24","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., and Polosukhin, I. (2017). Attention is all you need. Adv. Neural Inf. Process. Syst., 30."},{"key":"ref_25","doi-asserted-by":"crossref","first-page":"168","DOI":"10.1007\/s44196-023-00345-z","article-title":"Design of a modified transformer architecture based on relative position coding","volume":"16","author":"Zheng","year":"2023","journal-title":"Int. J. Comput. Intell. Syst."},{"key":"ref_26","doi-asserted-by":"crossref","first-page":"733","DOI":"10.1162\/coli_a_00445","article-title":"Position information in transformers: An overview","volume":"48","author":"Dufter","year":"2022","journal-title":"Comput. Linguist."},{"key":"ref_27","doi-asserted-by":"crossref","first-page":"164","DOI":"10.1016\/j.neucom.2023.03.001","article-title":"Transformer for skeleton-based action recognition: A review of recent advances","volume":"537","author":"Xin","year":"2023","journal-title":"Neurocomputing"},{"key":"ref_28","unstructured":"Chu, X., Tian, Z., Zhang, B., Wang, X., and Shen, C. (2021). Conditional positional encodings for vision transformers. arXiv."},{"key":"ref_29","doi-asserted-by":"crossref","first-page":"12581","DOI":"10.1109\/TPAMI.2023.3282631","article-title":"UniFormer: Unifying Convolution and Self-Attention for Visual Recognition","volume":"45","author":"Li","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Sensors"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.mdpi.com\/1424-8220\/24\/16\/5371\/pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T15:39:39Z","timestamp":1760110779000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.mdpi.com\/1424-8220\/24\/16\/5371"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,20]]},"references-count":29,"journal-issue":{"issue":"16","published-online":{"date-parts":[[2024,8]]}},"alternative-id":["s24165371"],"URL":"https:\/\/doi.org\/10.3390\/s24165371","relation":{},"ISSN":["1424-8220"],"issn-type":[{"value":"1424-8220","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,20]]}}}