{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:40:04Z","timestamp":1755884404107,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,9,26]],"date-time":"2023-09-26T00:00:00Z","timestamp":1695686400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,9,26]]},"DOI":"10.1145\/3629296.3629351","type":"proceedings-article","created":{"date-parts":[[2024,1,15]],"date-time":"2024-01-15T18:06:26Z","timestamp":1705341986000},"page":"335-340","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A teacher action recognition model based on pre-trained language and video model"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4431-0568","authenticated-orcid":false,"given":"Sen","family":"Luo","sequence":"first","affiliation":[{"name":"Key Laboratory of Education Informatization for Nationalities, Ministry of Education, Yunnan Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2693-2204","authenticated-orcid":false,"given":"Juxiang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Key Laboratory of Education Informatization for Nationalities, Ministry of Education, Yunnan Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7629-3223","authenticated-orcid":false,"given":"Xiaoyu","family":"Wen","sequence":"additional","affiliation":[{"name":"Key Laboratory of Education Informatization for Nationalities, Ministry of Education, Yunnan Normal University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9491-0624","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Education Informatization for Nationalities, Ministry of Education, Yunnan Normal University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,1,15]]},"reference":[{"issue":"06","key":"e_1_3_2_1_1_1","first-page":"107","volume":"32","author":"Xie W","year":"2022","unstructured":"Xie W, Tao Y, Gao J, YoWO Based Real-time Recognition of Classroom Learning Behavior[J]. Modern Educational Technology, 2022,32(06):107-114.","journal-title":"Modern Educational Technology"},{"key":"e_1_3_2_1_2_1","first-page":"105","article-title":"Behavior based on Human Skeleton and Deep Learning[J]","volume":"2020","author":"He X","unstructured":"He X, Yang F, Cheng Z, The Recognition of Student Classroom Behavior based on Human Skeleton and Deep Learning[J]. Modern Educational Technology, ,2020,30(11):105-112.","journal-title":"Modern Educational Technology"},{"key":"e_1_3_2_1_3_1","unstructured":"Devlin J. Chang M.-W. Lee K. and Toutanova K. Bert:Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 2018."},{"issue":"8","key":"e_1_3_2_1_4_1","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford A.","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., andSutskever, I. Language models are unsupervised multitask learners. OpenAI blog, 1(8):9, 2019.","journal-title":"OpenAI blog"},{"key":"e_1_3_2_1_5_1","volume-title":"Wang J","author":"Tong Z","year":"2022","unstructured":"Tong Z, Song Y, Wang J, Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training[J]. arXiv preprint arXiv:2203.12602, 2022."},{"key":"e_1_3_2_1_6_1","unstructured":"Dosovitskiy Alexey \"An image is worth 16x16 words: Transformers for image recognition at scale.\"\u00a0arXiv preprint arXiv:2010.11929\u00a0(2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Zhou K Yang J Loy C C Conditional prompt learning for vision-language models[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 16816-16825.","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"e_1_3_2_1_8_1","volume-title":"GPT understands, too[J]. arXiv preprint arXiv:2103.10385","author":"Liu X","year":"2021","unstructured":"Liu X, Zheng Y, Du Z, GPT understands, too[J]. arXiv preprint arXiv:2103.10385, 2021."},{"key":"e_1_3_2_1_9_1","volume-title":"Temporal 3d convnets: New architecture and transfer learning for video classification[J]. arXiv preprint arXiv:1711.08200","author":"Diba A","year":"2017","unstructured":"Diba A, Fayyaz M, Sharma V, Temporal 3d convnets: New architecture and transfer learning for video classification[J]. arXiv preprint arXiv:1711.08200, 2017."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Tran D Bourdev L Fergus R Learning spatial-temporal features with 3d convolutional networks[C]\/\/Proceedings of the IEEE international conference on computer vision. 2015: 4489-4497.","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Carreira J Zisserman A. Quo vadis action recognition? a new model and the kinetics dataset[C]\/\/proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2017: 6299-6308.","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_12_1","volume-title":"Two-stream convolutional networks for action recognition in videos[J]. Advances in neural information processing systems","author":"Simonyan K","year":"2014","unstructured":"Simonyan K, Zisserman A. Two-stream convolutional networks for action recognition in videos[J]. Advances in neural information processing systems, 2014, 27."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Yan S Xiong X Arnab A Multiview transformers for video recognition[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 3333-3343.","DOI":"10.1109\/CVPR52688.2022.00333"},{"key":"e_1_3_2_1_14_1","volume-title":"Tel Aviv, Israel, October 23\u201327","author":"Lin Z","year":"2022","unstructured":"Lin Z, Geng S, Zhang R, Frozen clip models are efficient video learners[C]\/\/Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXXV. Cham: Springer Nature Switzerland, 2022: 388-404."},{"volume-title":"PMLR","author":"Radford A","key":"e_1_3_2_1_15_1","unstructured":"Radford A, Kim J W, Hallacy C, Learning transferable visual models from natural language supervision[C]\/\/International conference on machine learning. PMLR, 2021: 8748-8763."},{"key":"e_1_3_2_1_16_1","volume-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild.arXiv preprint arXiv:1212.0402","author":"Soomro K.","year":"2012","unstructured":"Soomro, K., Zamir, A. R., and Shah, M. Ucf101: A dataset of 101 human actions classes from videos in the wild.arXiv preprint arXiv:1212.0402, 2012."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"e_1_3_2_1_18_1","volume-title":"RandAugment: Practical data augmentation with no separate search[J]","author":"Cubuk E D","year":"2019","unstructured":"Cubuk E D , Zoph B , Shlens J , RandAugment: Practical data augmentation with no separate search[J]. 2019."},{"key":"e_1_3_2_1_19_1","volume-title":"The kinetics human action video dataset[J]. arXiv preprint arXiv:1705.06950","author":"Kay W","year":"2017","unstructured":"Kay W, Carreira J, Simonyan K, The kinetics human action video dataset[J]. arXiv preprint arXiv:1705.06950, 2017."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Liu Z Ning J Cao Y Video swin transformer[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022: 3202-3211.","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_21_1","volume-title":"Is space-time attention all you need for video understanding?[C]\/\/ICML","author":"Bertasius G","year":"2021","unstructured":"Bertasius G, Wang H, Torresani L. Is space-time attention all you need for video understanding?[C]\/\/ICML. 2021, 2(3): 4."},{"key":"e_1_3_2_1_22_1","volume-title":"Keeping your eye on the ball: Trajectory attention in video transformers[J]. Advances in neural information processing systems","author":"Patrick M","year":"2021","unstructured":"Patrick M, Campbell D, Asano Y, Keeping your eye on the ball: Trajectory attention in video transformers[J]. Advances in neural information processing systems, 2021, 34: 12493-12506."},{"key":"e_1_3_2_1_23_1","volume-title":"mixup: Beyond empirical risk minimization[J]. arXiv preprint arXiv:1710.09412","author":"Zhang H","year":"2017","unstructured":"Zhang H, Cisse M, Dauphin Y N, mixup: Beyond empirical risk minimization[J]. arXiv preprint arXiv:1710.09412, 2017."},{"key":"e_1_3_2_1_24_1","volume-title":"You only watch once: A unified cnn architecture for real-time spatiotemporal action localization[J]. arXiv preprint arXiv:1911.06644","author":"K\u00f6p\u00fckl\u00fc O","year":"2019","unstructured":"K\u00f6p\u00fckl\u00fc O, Wei X, Rigoll G. You only watch once: A unified cnn architecture for real-time spatiotemporal action localization[J]. arXiv preprint arXiv:1911.06644, 2019."},{"key":"e_1_3_2_1_25_1","volume-title":"Spatial temporal graph convolutional networks for skeleton-based action recognition[C]\/\/Proceedings of the AAAI conference on artificial intelligence","author":"Yan S","year":"2018","unstructured":"Yan S, Xiong Y, Lin D. Spatial temporal graph convolutional networks for skeleton-based action recognition[C]\/\/Proceedings of the AAAI conference on artificial intelligence. 2018, 32(1)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3028207"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Duan H Zhao Y Chen K Revisiting skeleton-based action recognition[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022: 2969-2978.","DOI":"10.1109\/CVPR52688.2022.00298"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.18178\/ijiet.2021.11.3.1500"}],"event":{"name":"ICETC 2023: The 15th International Conference on Education Technology and Computers","acronym":"ICETC 2023","location":"Barcelona Spain"},"container-title":["The 15th International Conference on Education Technology and Computers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3629296.3629351","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3629296.3629351","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:14:06Z","timestamp":1755882846000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3629296.3629351"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,26]]},"references-count":28,"alternative-id":["10.1145\/3629296.3629351","10.1145\/3629296"],"URL":"https:\/\/doi.org\/10.1145\/3629296.3629351","relation":{},"subject":[],"published":{"date-parts":[[2023,9,26]]},"assertion":[{"value":"2024-01-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}