{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T12:14:51Z","timestamp":1775132091867,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Research and Practice on Teaching Evaluation Mechanisms in the Intelligence Era","award":["202411417002"],"award-info":[{"award-number":["202411417002"]}]},{"name":"the National Language Commission Key Project","award":["ZDI145-110"],"award-info":[{"award-number":["ZDI145-110"]}]},{"name":"the Beijing Municipal Education Working Committee Project","award":["XXSZ2024GZ17"],"award-info":[{"award-number":["XXSZ2024GZ17"]}]},{"name":"the Beijing Scientific Research Innovation Team Project","award":["BPHR20220121"],"award-info":[{"award-number":["BPHR20220121"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00530-025-02207-4","type":"journal-article","created":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T03:43:43Z","timestamp":1770090223000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["CBHA-DETR: multi-kernel attention and deformable fusion network for behavior recognition in classroom monitoring"],"prefix":"10.1007","volume":"32","author":[{"given":"Tianci","family":"Li","sequence":"first","affiliation":[]},{"given":"Jin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Cheng","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Bingxin","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Ning","family":"An","sequence":"additional","affiliation":[]},{"given":"Jiancheng","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,3]]},"reference":[{"issue":"3","key":"2207_CR1","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1007\/s00138-023-01396-0","volume":"34","author":"J Liu","year":"2023","unstructured":"Liu, J., Mu, X., Liu, Z., Li, H.: Human skeleton behavior recognition model based on multi-object pose estimation with spatiotemporal semantics. Machine vision and applications 34(3), 44 (2023)","journal-title":"Machine vision and applications"},{"issue":"21","key":"2207_CR2","doi-asserted-by":"publisher","first-page":"25310","DOI":"10.1007\/s10489-023-04858-0","volume":"53","author":"Z Chen","year":"2023","unstructured":"Chen, Z., Liang, M., Xue, Z., Yu, W.: Stran: Student expression recognition based on spatio-temporal residual attention network in classroom teaching videos. Applied Intelligence 53(21), 25310\u201325329 (2023)","journal-title":"Applied Intelligence"},{"issue":"1","key":"2207_CR3","doi-asserted-by":"publisher","first-page":"6336773","DOI":"10.1155\/2021\/6336773","volume":"2021","author":"G Li","year":"2021","unstructured":"Li, G., Liu, F., Wang, Y., Guo, Y., Xiao, L., Zhu, L.: A convolutional neural network (cnn) based approach for the recognition and evaluation of classroom teaching behavior. Scientific Programming 2021(1), 6336773 (2021) https:\/\/doi.org\/10.1155\/2021\/6336773","journal-title":"Scientific Programming"},{"key":"2207_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2023.3296124","volume":"72","author":"J Zhao","year":"2023","unstructured":"Zhao, J., Zhu, H.: Cbph-net: a small object detector for behavior recognition in classroom scenarios. IEEE Transactions on Instrumentation and Measurement 72, 1\u201312 (2023) https:\/\/doi.org\/10.1109\/TIM.2023.3296124","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"2207_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2025.3550238","volume":"74","author":"W Lu","year":"2025","unstructured":"Lu, W., Liu, X., Peng, Y., Kyrarini, M., An, K., Cheng, Y.: Pacr-detr: A real-time end-to-end object detector for behavior recognition in various classroom scenarios. IEEE Transactions on Instrumentation and Measurement 74, 1\u201320 (2025) https:\/\/doi.org\/10.1109\/TIM.2025.3550238","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"2207_CR6","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: Detrs beat yolos on real-time object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974 (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"issue":"4","key":"2207_CR7","doi-asserted-by":"publisher","first-page":"974","DOI":"10.1109\/TAFFC.2019.2908837","volume":"12","author":"N Bosch","year":"2019","unstructured":"Bosch, N., D\u2019mello, S.K.: Automatic detection of mind wandering from video in the lab and in the classroom. IEEE Transactions on Affective Computing 12(4), 974\u2013988 (2019) https:\/\/doi.org\/10.1109\/TAFFC.2019.2908837","journal-title":"IEEE Transactions on Affective Computing"},{"key":"2207_CR8","doi-asserted-by":"crossref","unstructured":"Dang, M., Liu, G., Li, X., Wan, B., Zhang, Y., Pan, R.: Object detector based on center keypoints for behavior recognition in classroom scenes. IEEE Transactions on Computational Social Systems 12(6), 4744\u20134756 (2025)","DOI":"10.1109\/TCSS.2025.3552067"},{"key":"2207_CR9","doi-asserted-by":"crossref","unstructured":"Yue, G., Jiao, G., Xiang, J.: Semi-supervised iterative learning network for camouflaged object detection. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135 (2025)","DOI":"10.1109\/ICASSP49660.2025.10890224"},{"key":"2207_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2025.130433","volume":"645","author":"F Wang","year":"2025","unstructured":"Wang, F., Jiao, G., Yue, G.: More observation leads to more clarity: multi-view collaboration network for camouflaged object detection. Neurocomputing 645, 130433 (2025) https:\/\/doi.org\/10.1016\/j.neucom.2025.130433","journal-title":"Neurocomputing"},{"key":"2207_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.125392","volume":"260","author":"Z Wang","year":"2025","unstructured":"Wang, Z., Li, L., Zeng, C., Dong, S., Sun, J.: Slbdetection-net: towards closed-set and open-set student learning behavior detection in smart classroom of k-12 education. Expert Systems with Applications 260, 125392 (2025) https:\/\/doi.org\/10.1016\/j.eswa.2024.125392","journal-title":"Expert Systems with Applications"},{"issue":"8","key":"2207_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jksuci.2023.101670","volume":"35","author":"J Zhao","year":"2023","unstructured":"Zhao, J., Zhu, H., Niu, L.: Bitnet: a lightweight object detection network for real-time classroom behavior recognition with transformer and bi-directional pyramid network. Journal of King Saud University-Computer and Information Sciences 35(8), 101670 (2023) https:\/\/doi.org\/10.1016\/j.jksuci.2023.101670","journal-title":"Journal of King Saud University-Computer and Information Sciences"},{"issue":"6","key":"2207_CR13","doi-asserted-by":"publisher","first-page":"3957","DOI":"10.1007\/s00371-024-03640-8","volume":"41","author":"G Yue","year":"2025","unstructured":"Yue, G., Jiao, G., Li, C., Xiang, J.: When cnn meet with vit: decision-level feature fusion for camouflaged object detection. The Visual Computer 41(6), 3957\u20133972 (2025)","journal-title":"The Visual Computer"},{"key":"2207_CR14","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2207_CR15","doi-asserted-by":"crossref","unstructured":"Li, T., Yu, C., Li, Y.: Ddr-detr: Real-time face detection algorithm for classroom scenarios. In: Proceedings of the 2024 International Conference on Artificial Intelligence of Things and Computing, pp. 192\u2013197 (2024)","DOI":"10.1145\/3708282.3708317"},{"key":"2207_CR16","doi-asserted-by":"crossref","unstructured":"Ma, X., Dai, X., Bai, Y., Wang, Y., Fu, Y.: Rewrite the stars. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5694\u20135703 (2024)","DOI":"10.1109\/CVPR52733.2024.00544"},{"key":"2207_CR17","doi-asserted-by":"crossref","unstructured":"Wen, P., Sun, C., Zhang, S., Luo, Y., Huang, H., Zhang, J.: Eopsa-face: An encoder-only detr for classroom face detection. In: 2022 10th International Conference on Information Systems and Computing Technology (ISCTech), pp. 341\u2013348 (2022). IEEE","DOI":"10.1109\/ISCTech58360.2022.00060"},{"key":"2207_CR18","doi-asserted-by":"crossref","unstructured":"Cai, X., Lai, Q., Wang, Y., Wang, W., Sun, Z., Yao, Y.: Poly kernel inception network for remote sensing detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 27706\u201327716 (2024)","DOI":"10.1109\/CVPR52733.2024.02617"},{"key":"2207_CR19","unstructured":"Dai, W., Liu, R., Wu, Z., Wu, T., Wang, M., Zhou, J., Yuan, Y., Liu, J.: Exploiting scale-variant attention for segmenting small medical objects. arXiv preprint arXiv:2407.07720 (2024)"},{"key":"2207_CR20","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, K., Xu, R., Liu, Z., Loy, C.C., Lin, D.: Carafe: Content-aware reassembly of features. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3007\u20133016 (2019)","DOI":"10.1109\/ICCV.2019.00310"},{"key":"2207_CR21","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"2207_CR22","doi-asserted-by":"crossref","unstructured":"Zhu, X., Hu, H., Lin, S., Dai, J.: Deformable convnets v2: More deformable, better results. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9308\u20139316 (2019)","DOI":"10.1109\/CVPR.2019.00953"},{"key":"2207_CR23","doi-asserted-by":"crossref","unstructured":"Wang, W., Dai, J., Chen, Z., Huang, Z., Li, Z., Zhu, X., Hu, X., Lu, T., Lu, L., Li, H., et al.: Internimage: Exploring large-scale vision foundation models with deformable convolutions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14408\u201314419 (2023)","DOI":"10.1109\/CVPR52729.2023.01385"},{"key":"2207_CR24","doi-asserted-by":"crossref","unstructured":"Xiong, Y., Li, Z., Chen, Y., Wang, F., Zhu, X., Luo, J., Wang, W., Lu, T., Li, H., Qiao, Y., et al.: Efficient deformable convnets: Rethinking dynamic and sparse operator for vision applications. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5652\u20135661 (2024)","DOI":"10.1109\/CVPR52733.2024.00540"},{"key":"2207_CR25","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"2207_CR26","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., Ren, D.: Distance-iou loss: Faster and better learning for bounding box regression. In: Proceedings of the AAAI Conference on Artificial Intelligence, 34, 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"2207_CR27","unstructured":"Gevorgyan, Z.: Siou loss: More powerful learning for bounding box regression. arXiv preprint arXiv:2205.12740 (2022)"},{"key":"2207_CR28","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z., Huang, T.: Unitbox: An advanced object detection network. In: Proceedings of the 24th ACM International Conference on Multimedia, pp. 516\u2013520 (2016)","DOI":"10.1145\/2964284.2967274"},{"key":"2207_CR29","unstructured":"Zhang, H., Zhang, S.: Shape-iou: More accurate metric considering bounding box shape and scale. arXiv preprint arXiv:2312.17663 (2023)"},{"key":"2207_CR30","unstructured":"Yang, F., Wang, T.: Scb-dataset3: A benchmark for detecting student classroom behavior. arXiv preprint arXiv:2310.02522 (2023)"},{"key":"2207_CR31","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.A., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes challenge: a retrospective. International journal of computer vision 111(1), 98\u2013136 (2015)","journal-title":"International journal of computer vision"},{"key":"2207_CR32","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"2207_CR33","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: Ssd: Single shot multibox detector. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part I 14, pp. 21\u201337 (2016). Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"2207_CR34","unstructured":"Khanam, R., Hussain, M.: Yolov11: An overview of the key architectural enhancements. arXiv preprint arXiv:2410.17725 (2024)"},{"key":"2207_CR35","unstructured":"Tian, Y., Ye, Q., Doermann, D.: Yolov12: Attention-centric real-time object detectors. arXiv preprint arXiv:2502.12524 (2025)"},{"key":"2207_CR36","doi-asserted-by":"crossref","unstructured":"Wang, Z., Li, C., Xu, H., Zhu, X., Li, H.: Mamba yolo: A simple baseline for object detection with state space model. In: Proceedings of the AAAI Conference on Artificial Intelligence, 39(8) , pp. 8205\u20138213 (2025)","DOI":"10.1609\/aaai.v39i8.32885"},{"key":"2207_CR37","doi-asserted-by":"crossref","unstructured":"Zheng, D., Dong, W., Hu, H., Chen, X., Wang, Y.: Less is more: Focus attention for efficient detr. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6674\u20136683 (2023)","DOI":"10.1109\/ICCV51070.2023.00614"},{"key":"2207_CR38","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"2207_CR39","unstructured":"Lv, W., Zhao, Y., Chang, Q., Huang, K., Wang, G., Liu, Y.: Rt-detrv2: Improved baseline with bag-of-freebies for real-time detection transformer. arXiv preprint arXiv:2407.17140 (2024)"},{"key":"2207_CR40","doi-asserted-by":"crossref","unstructured":"Huang, S., Lu, Z., Cun, X., Yu, Y., Zhou, X., Shen, X.: Deim: Detr with improved matching for fast convergence. In: Proceedings of the Computer Vision and Pattern Recognition Conference, pp. 15162\u201315171 (2025)","DOI":"10.1109\/CVPR52734.2025.01412"},{"key":"2207_CR41","unstructured":"Peng, Y., Li, H., Wu, P., Zhang, Y., Sun, X., Wu, F.: D-fine: Redefine regression task in detrs as fine-grained distribution refinement. arXiv preprint arXiv:2410.13842 (2024)"},{"key":"2207_CR42","unstructured":"Wang, J., Xu, C., Yang, W., Yu, L.: A normalized gaussian wasserstein distance for tiny object detection. arXiv preprint arXiv:2110.13389 (2021)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02207-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02207-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02207-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:38:07Z","timestamp":1775129887000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02207-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,3]]},"references-count":42,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2207"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02207-4","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,3]]},"assertion":[{"value":"22 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"112"}}