{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:20:18Z","timestamp":1778048418620,"version":"3.51.4"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T00:00:00Z","timestamp":1778025600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T00:00:00Z","timestamp":1778025600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1007\/s00530-026-02369-9","type":"journal-article","created":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:01:24Z","timestamp":1778047284000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Student action detection in class with individual temporal attention and co-occurrence awareness"],"prefix":"10.1007","volume":"32","author":[{"given":"Guoying","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nannan","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lufeng","family":"Mo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaomei","family":"Yi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,5,6]]},"reference":[{"key":"2369_CR1","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Wang, G.: Research on application of intelligent analysis in monitoring of classroom teaching. In: 2021 3rd International Conference on Advances in Computer Technology, Information Science and Communication (CTISC), pp. 253\u2013257 (2021). IEEE","DOI":"10.1109\/CTISC52352.2021.00053"},{"issue":"2","key":"2369_CR2","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I.: On space-time interest points. Int. J. Comput. Vision 64(2), 107\u2013123 (2005)","journal-title":"Int. J. Comput. Vision"},{"key":"2369_CR3","doi-asserted-by":"crossref","unstructured":"Klaser, A., Marsza\u0142ek, M., Schmid, C.: A spatio-temporal descriptor based on 3d-gradients. In: BMVC 2008-19th British Machine Vision Conference, pp. 275\u20131 (2008). British Machine Vision Association","DOI":"10.5244\/C.22.99"},{"key":"2369_CR4","doi-asserted-by":"crossref","unstructured":"Wang, L., Qiao, Y., Tang, X.: Action recognition with trajectory-pooled deep-convolutional descriptors. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4305\u20134314 (2015)","DOI":"10.1109\/CVPR.2015.7299059"},{"issue":"1","key":"2369_CR5","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.-L.: Dense trajectories and motion boundary descriptors for action recognition. Int. J. Comput. Vision 103(1), 60\u201379 (2013)","journal-title":"Int. J. Comput. Vision"},{"key":"2369_CR6","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"2369_CR7","doi-asserted-by":"crossref","unstructured":"Chen, S., Sun, P., Xie, E., Ge, C., Wu, J., Ma, L., Shen, J., Luo, P.: Watch only once: An end-to-end video action detection framework. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8178\u20138187 (2021)","DOI":"10.1109\/ICCV48922.2021.00807"},{"key":"2369_CR8","doi-asserted-by":"crossref","unstructured":"Wu, T., Cao, M., Gao, Z., Wu, G., Wang, L.: Stmixer: A one-stage sparse action detector. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14720\u201314729 (2023)","DOI":"10.1109\/CVPR52729.2023.01414"},{"key":"2369_CR9","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"2369_CR10","doi-asserted-by":"crossref","unstructured":"Donahue, J., Anne\u00a0Hendricks, L., Guadarrama, S., Rohrbach, M., Venugopalan, S., Saenko, K., Darrell, T.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"2369_CR11","unstructured":"Liu, C., Kong, Y., Wu, X., Jia, Y.: Action recognition with discriminative mid-level features. In: Proceedings of the 21st International Conference on Pattern Recognition (ICPR2012), pp. 3366\u20133369 (2012). IEEE"},{"key":"2369_CR12","doi-asserted-by":"crossref","unstructured":"Liu, C., Gao, Y., Li, Z., Du, C., Liu, F., Shi, X.: Action prediction network with auxiliary observation ratio regression. In: 2021 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 (2021). IEEE Computer Society","DOI":"10.1109\/ICME51207.2021.9428266"},{"key":"2369_CR13","doi-asserted-by":"crossref","unstructured":"Wu, Z., Jiang, Y.-G., Wang, J., Pu, J., Xue, X.: Exploring inter-feature and inter-class relationships with deep neural networks for video classification. In: Proceedings of the 22nd ACM International Conference on Multimedia, pp. 167\u2013176 (2014)","DOI":"10.1145\/2647868.2654931"},{"issue":"1","key":"2369_CR14","doi-asserted-by":"publisher","first-page":"242","DOI":"10.1109\/TCDS.2022.3157813","volume":"15","author":"N Nigam","year":"2022","unstructured":"Nigam, N., Dutta, T., Verma, D.: Fall-perceived action recognition of persons with neurological disorders using semantic supervision. IEEE Trans. Cogn. and Dev. Syst. 15(1), 242\u2013251 (2022)","journal-title":"IEEE Trans. Cogn. and Dev. Syst."},{"issue":"3","key":"2369_CR15","doi-asserted-by":"publisher","first-page":"976","DOI":"10.1109\/TCSVT.2021.3070688","volume":"32","author":"N Nigam","year":"2021","unstructured":"Nigam, N., Dutta, T., Gupta, H.P.: Factornet: Holistic actor, object, and scene factorization for action recognition in videos. IEEE Trans. Circuits Syst. Video Technol. 32(3), 976\u2013991 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"2369_CR16","doi-asserted-by":"crossref","unstructured":"Tan, W., Yao, Q., Liu, J.: Overlooked video classification in weakly supervised video anomaly detection. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 202\u2013210 (2024)","DOI":"10.1109\/WACVW60836.2024.00029"},{"key":"2369_CR17","doi-asserted-by":"crossref","unstructured":"Arnab, A., Dehghani, M., Heigold, G., Sun, C., Lu\u010di\u0107, M., Schmid, C.: Vivit: A video vision transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6836\u20136846 (2021)","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"2369_CR18","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: Icml, vol. 2, p. 4 (2021)"},{"key":"2369_CR19","doi-asserted-by":"crossref","unstructured":"Fan, H., Xiong, B., Mangalam, K., Li, Y., Yan, Z., Malik, J., Feichtenhofer, C.: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6824\u20136835 (2021)","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"2369_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., Ning, J., Cao, Y., Wei, Y., Zhang, Z., Lin, S., Hu, H.: Video swin transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"2369_CR21","doi-asserted-by":"crossref","unstructured":"Gu, C., Sun, C., Ross, D.A., Vondrick, C., Pantofaru, C., Li, Y., Vijayanarasimhan, S., Toderici, G., Ricco, S., Sukthankar, R., et al.: Ava: A video dataset of spatio-temporally localized atomic visual actions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6047\u20136056 (2018)","DOI":"10.1109\/CVPR.2018.00633"},{"key":"2369_CR22","unstructured":"Girdhar, R., Carreira, J., Doersch, C., Zisserman, A.: A better baseline for ava. arXiv preprint arXiv:1807.10066 (2018)"},{"key":"2369_CR23","doi-asserted-by":"crossref","unstructured":"Hou, R., Chen, C., Shah, M.: Tube convolutional neural network (t-cnn) for action detection in videos. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5822\u20135831 (2017)","DOI":"10.1109\/ICCV.2017.620"},{"key":"2369_CR24","doi-asserted-by":"crossref","unstructured":"Yuan, X., Cheng, G., Yan, K., Zeng, Q., Han, J.: Small object detection via coarse-to-fine proposal generation and imitation learning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6317\u20136327 (2023)","DOI":"10.1109\/ICCV51070.2023.00581"},{"issue":"6","key":"2369_CR25","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2369_CR26","doi-asserted-by":"crossref","unstructured":"Murray, N., Marchesotti, L., Perronnin, F.: Ava: A large-scale database for aesthetic visual analysis. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 2408\u20132415 (2012). IEEE","DOI":"10.1109\/CVPR.2012.6247954"},{"key":"2369_CR27","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"2369_CR28","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"2369_CR29","doi-asserted-by":"crossref","unstructured":"Pan, J., Chen, S., Shou, M.Z., Liu, Y., Shao, J., Li, H.: Actor-context-actor relation network for spatio-temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 464\u2013474 (2021)","DOI":"10.1109\/CVPR46437.2021.00053"},{"key":"2369_CR30","doi-asserted-by":"crossref","unstructured":"Tang, J., Xia, J., Mu, X., Pang, B., Lu, C.: Asynchronous interaction aggregation for action detection. In: European Conference on Computer Vision, pp. 71\u201387 (2020). Springer","DOI":"10.1007\/978-3-030-58555-6_5"},{"key":"2369_CR31","doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhang, Y., Li, X., Chen, H., Shuai, B., Xu, M., Liu, C., Kundu, K., Xiong, Y., Modolo, D., et al.: Tuber: Tubelet transformer for video action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13598\u201313607 (2022)","DOI":"10.1109\/CVPR52688.2022.01323"},{"key":"2369_CR32","doi-asserted-by":"crossref","unstructured":"Shi, D., Zhong, Y., Cao, Q., Ma, L., Li, J., Tao, D.: Tridet: Temporal action detection with relative boundary modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18857\u201318866 (2023)","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"2369_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2023.104726","volume":"136","author":"Y Li","year":"2023","unstructured":"Li, Y., Qi, X., Saudagar, A.K.J., Badshah, A.M., Muhammad, K., Liu, S.: Student behavior recognition for interaction detection in the classroom environment. Image Vis. Comput. 136, 104726 (2023)","journal-title":"Image Vis. Comput."},{"key":"2369_CR34","doi-asserted-by":"crossref","unstructured":"Fu, R., Wu, T., Luo, Z., Duan, F., Qiao, X., Guo, P.: Learning behavior analysis in classroom based on deep learning. In: 2019 Tenth International Conference on Intelligent Control and Information Processing (ICICIP), pp. 206\u2013212 (2019). IEEE","DOI":"10.1109\/ICICIP47338.2019.9012177"},{"issue":"1","key":"2369_CR35","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1186\/s13638-021-02015-0","volume":"2021","author":"Y Zhang","year":"2021","unstructured":"Zhang, Y., Zhu, T., Ning, H., Liu, Z.: Classroom student posture recognition based on an improved high-resolution network. EURASIP J. Wirel. Commun. Netw. 2021(1), 140 (2021)","journal-title":"EURASIP J. Wirel. Commun. Netw."},{"issue":"1","key":"2369_CR36","first-page":"7049458","volume":"2022","author":"J Zhou","year":"2022","unstructured":"Zhou, J., Ran, F., Li, G., Peng, J., Li, K., Wang, Z.: Classroom learning status assessment based on deep learning. Math. Probl. Eng. 2022(1), 7049458 (2022)","journal-title":"Math. Probl. Eng."},{"issue":"2","key":"2369_CR37","first-page":"2421","volume":"40","author":"P Chonggao","year":"2021","unstructured":"Chonggao, P.: Simulation of student classroom behavior recognition based on cluster analysis and random forest algorithm. J. Intell. & Fuz. Syst. 40(2), 2421\u20132431 (2021)","journal-title":"J. Intell. & Fuz. Syst."},{"issue":"1","key":"2369_CR38","first-page":"3819409","volume":"2022","author":"Y Ding","year":"2022","unstructured":"Ding, Y., Bao, K., Zhang, J.: An intelligent system for detecting abnormal behavior in students based on the human skeleton and deep learning. Comput. Intell. Neurosci. 2022(1), 3819409 (2022)","journal-title":"Comput. Intell. Neurosci."},{"issue":"3","key":"2369_CR39","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1049\/iet-ipr.2018.5905","volume":"13","author":"L Tang","year":"2019","unstructured":"Tang, L., Gao, C., Chen, X., Zhao, Y.: Pose detection in complex classroom environment based on improved faster r-cnn. IET Image Proc. 13(3), 451\u2013457 (2019)","journal-title":"IET Image Proc."},{"issue":"10","key":"2369_CR40","doi-asserted-by":"publisher","first-page":"12861","DOI":"10.1007\/s11227-022-04402-w","volume":"78","author":"S Liu","year":"2022","unstructured":"Liu, S., Zhang, J., Su, W.: An improved method of identifying learner\u2019s behaviors based on deep learning. J. Supercomput. 78(10), 12861\u201312872 (2022)","journal-title":"J. Supercomput."},{"key":"2369_CR41","doi-asserted-by":"crossref","unstructured":"Xie, Y., Zhang, S., Liu, Y.: Abnormal behavior recognition in classroom pose estimation of college students based on spatiotemporal representation learning. Traitement du Signal 38(1) (2021)","DOI":"10.18280\/ts.380109"},{"issue":"18","key":"2369_CR42","doi-asserted-by":"publisher","first-page":"10426","DOI":"10.3390\/app131810426","volume":"13","author":"Z Shou","year":"2023","unstructured":"Shou, Z., Yan, M., Wen, H., Liu, J., Mo, J., Zhang, H.: Research on students\u2019 action behavior recognition method based on classroom time-series images. Appl. Sci. 13(18), 10426 (2023)","journal-title":"Appl. Sci."},{"key":"2369_CR43","doi-asserted-by":"crossref","unstructured":"Chen, L., Tong, Z., Song, Y., Wu, G., Wang, L.: Efficient video action detection with token dropout and context refinement. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10388\u201310399 (2023)","DOI":"10.1109\/ICCV51070.2023.00953"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02369-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-026-02369-9","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-026-02369-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:01:52Z","timestamp":1778047312000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-026-02369-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,6]]},"references-count":43,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2026,8]]}},"alternative-id":["2369"],"URL":"https:\/\/doi.org\/10.1007\/s00530-026-02369-9","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,5,6]]},"assertion":[{"value":"3 October 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 March 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 May 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Ethics approval was not required for this research.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"All authors have read and agreed to publish this work.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}],"article-number":"291"}}