{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:33:23Z","timestamp":1773156803490,"version":"3.50.1"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T00:00:00Z","timestamp":1749081600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T00:00:00Z","timestamp":1749081600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00138-025-01711-x","type":"journal-article","created":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T00:00:58Z","timestamp":1749081658000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Block-recurrent visual transformer for enhanced human detection in thermal imaging"],"prefix":"10.1007","volume":"36","author":[{"given":"Pham Cung Le Thien","family":"Vu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pham The","family":"Bao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0443-612X","authenticated-orcid":false,"given":"Tan Dat","family":"Trinh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,6,5]]},"reference":[{"key":"1711_CR1","doi-asserted-by":"publisher","first-page":"103116","DOI":"10.1016\/j.jvcir.2021.103116","volume":"77","author":"O Elharrouss","year":"2021","unstructured":"Elharrouss, O., Almaadeed, N., Al-Maadeed, S.: A review of video surveillance systems. J. Vis. Commun. Image Represent. 77, 103116 (2021). https:\/\/doi.org\/10.1016\/j.jvcir.2021.103116","journal-title":"J. Vis. Commun. Image Represent."},{"key":"1711_CR2","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.neucom.2018.01.092","volume":"300","author":"A Brunetti","year":"2018","unstructured":"Brunetti, A., Buongiorno, D., Trotta, G.F., Bevilacqua, V.: Computer vision and deep learning techniques for pedestrian detection and tracking: A survey. Neurocomputing. 300, 17\u201333 (2018). https:\/\/doi.org\/10.1016\/j.neucom.2018.01.092","journal-title":"Neurocomputing"},{"key":"1711_CR3","doi-asserted-by":"publisher","first-page":"125459","DOI":"10.1109\/ACCESS.2020.3007481","volume":"8","author":"M Kri\u0161to","year":"2020","unstructured":"Kri\u0161to, M., Ivasic-Kos, M., Pobar, M.: Thermal object detection in difficult weather conditions using YOLO. IEEE Access. 8, 125459\u2013125476 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.3007481","journal-title":"IEEE Access."},{"issue":"15","key":"1711_CR4","doi-asserted-by":"publisher","first-page":"5835","DOI":"10.3390\/s22155835","volume":"22","author":"H Tan","year":"2022","unstructured":"Tan, H., Ou, D., Zhang, L., Shen, G., Li, X., Ji, Y.: Infrared Sensation-Based salient targets enhancement methods in Low-Visibility scenes. Sensors. 22(15), 5835 (2022). https:\/\/doi.org\/10.3390\/s22155835","journal-title":"Sensors"},{"issue":"9","key":"1711_CR5","doi-asserted-by":"publisher","first-page":"85","DOI":"10.14801\/jkiit.2018.16.9.85","volume":"16","author":"T Tan Dat","year":"2018","unstructured":"Tan Dat, T., Young, J.: Multi-scale pedestrian detection in thermal imaging using deep convolutional neural network and adaptive NMS. J. Korean Inst. Inform. Technol. 16(9), 85\u201394 (2018). https:\/\/doi.org\/10.14801\/jkiit.2018.16.9.85","journal-title":"J. Korean Inst. Inform. Technol."},{"key":"1711_CR6","doi-asserted-by":"publisher","unstructured":"John, V., Mita, S., Liu, Z., Qi, B.: Pedestrian detection in thermal images using adaptive fuzzy C-means clustering and convolutional neural networks. In: 2015 14th IAPR International Conference on Machine Vision Applications (MVA), 18\u201322 May pp. 246\u2013249, (2015) (2015). https:\/\/doi.org\/10.1109\/MVA.2015.7153177","DOI":"10.1109\/MVA.2015.7153177"},{"issue":"7","key":"1711_CR7","doi-asserted-by":"publisher","first-page":"101","DOI":"10.14801\/jkiit.2017.15.7.101","volume":"15","author":"T Tan Dat","year":"2017","unstructured":"Tan Dat, T., Xinjie, M., Jin Young, K.: Improved running Gaussian average for background Subtraction in thermal imagery. J. Korean Inst. Inform. Technol. 15(7), 101\u2013117 (2017). https:\/\/doi.org\/10.14801\/jkiit.2017.15.7.101","journal-title":"J. Korean Inst. Inform. Technol."},{"key":"1711_CR8","doi-asserted-by":"publisher","unstructured":"Zin, T.T., Takahashi, H., Hama, H.: Robust Person Detection using Far Infrared Camera for Image Fusion. In: Second International Conference on Innovative Computing, Informatio and Control (ICICIC 2007), 5\u20137 Sept. pp. 310\u2013310, (2007) (2007). https:\/\/doi.org\/10.1109\/ICICIC.2007.501","DOI":"10.1109\/ICICIC.2007.501"},{"issue":"11","key":"1711_CR9","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"ZQ Zhao","year":"2019","unstructured":"Zhao, Z.Q., Zheng, P., Xu, S.T., Wu, X.: Object detection with deep learning: A review. IEEE Trans. Neural Networks Learn. Syst. 30(11), 3212\u20133232 (2019). https:\/\/doi.org\/10.1109\/TNNLS.2018.2876865","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"1711_CR10","doi-asserted-by":"publisher","first-page":"105106","DOI":"10.1016\/j.imavis.2024.105106","volume":"148","author":"B Ghari","year":"2024","unstructured":"Ghari, B., Tourani, A., Shahbahrami, A., Gaydadjiev, G.: Pedestrian detection in low-light conditions: A comprehensive survey. Image Vis. Comput. 148, 105106 (2024). https:\/\/doi.org\/10.1016\/j.imavis.2024.105106","journal-title":"Image Vis. Comput."},{"issue":"1","key":"1711_CR11","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1109\/TPAMI.2015.2437384","volume":"38","author":"R Girshick","year":"2016","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Region-Based convolutional networks for accurate object detection and segmentation. IEEE Trans. Pattern Anal. Mach. Intell. 38(1), 142\u2013158 (2016). https:\/\/doi.org\/10.1109\/TPAMI.2015.2437384","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1711_CR12","doi-asserted-by":"publisher","unstructured":"Girshick, R.: Fast R-CNN. In: IEEE International Conference on Computer Vision (ICCV), 7\u201313 Dec. 2015, pp. 1440\u20131448, (2015) (2015). https:\/\/doi.org\/10.1109\/ICCV.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"issue":"6","key":"1711_CR13","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: Towards Real-Time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2017). https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1711_CR14","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: 2017 IEEE International Conference on Computer Vision (ICCV), 22\u201329 Oct. pp. 2980\u20132988, (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.322","DOI":"10.1109\/ICCV.2017.322"},{"key":"1711_CR15","unstructured":"Dai, J., Li, Y., He, K., Sun, J.: R-FCN: object detection via region-based fully convolutional networks. presented at the Proceedings of the 30th International Conference on Neural Information Processing Systems, Barcelona, Spain, (2016)"},{"key":"1711_CR16","doi-asserted-by":"publisher","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You Only Look Once: Unified, Real-Time Object Detection. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 27\u201330 June pp. 779\u2013788, (2016). https:\/\/doi.org\/10.1109\/CVPR.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"key":"1711_CR17","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: SSD: Single shot multibox detector. In: Cham, B., Leibe, J., Matas, N., Sebe, Welling, M. (eds.) Computer Vision\u2013 ECCV 2016, pp. 21\u201337. Springer International Publishing, (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"issue":"2","key":"1711_CR18","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","volume":"42","author":"TY Lin","year":"2020","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. IEEE Trans. Pattern Anal. Mach. Intell. 42(2), 318\u2013327 (2020). https:\/\/doi.org\/10.1109\/TPAMI.2018.2858826","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1711_CR19","doi-asserted-by":"publisher","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: FCOS: Fully Convolutional One-Stage Object Detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), 27 Oct.-2 Nov. pp. 9626\u20139635, (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00972","DOI":"10.1109\/ICCV.2019.00972"},{"key":"1711_CR20","doi-asserted-by":"publisher","unstructured":"Tan, M., Pang, R., Le, Q.V.: EfficientDet: Scalable and Efficient Object Detection. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 13\u201319 June 2020, pp. 10778\u201310787, (2020). https:\/\/doi.org\/10.1109\/CVPR42600.2020.01079","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"1711_CR21","doi-asserted-by":"publisher","unstructured":"Feng, C., Zhong, Y., Gao, Y., Scott, M.R., Huang, W.: TOOD: Task-aligned One-stage Object Detection. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), 10\u201317 Oct. pp. 3490\u20133499, (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00349","DOI":"10.1109\/ICCV48922.2021.00349"},{"key":"1711_CR22","doi-asserted-by":"publisher","unstructured":"Terven, J., C\u00f3rdova-Esparza, D.-M., Romero-Gonz\u00e1lez, J.-A.: A Comprehensive Review of YOLO Architectures in Computer Vision: From YOLOv1 to YOLOv8 and YOLO-NAS. Machine Learning and Knowledge Extraction. 5 (4), 1680\u20131716 (2023). https:\/\/doi.org\/10.3390\/make5040083","DOI":"10.3390\/make5040083"},{"key":"1711_CR23","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-End object detection with Transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) Computer Vision\u2013 ECCV 2020, Cham, pp. 213\u2013229. Springer International Publishing, (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1711_CR24","doi-asserted-by":"publisher","first-page":"107021","DOI":"10.1016\/j.engappai.2023.107021","volume":"126","author":"Y Li","year":"2023","unstructured":"Li, Y., Miao, N., Ma, L., Shuang, F., Huang, X.: Transformer for object detection: Review and benchmark. Eng. Appl. Artif. Intell. 126, 107021 (2023). https:\/\/doi.org\/10.1016\/j.engappai.2023.107021","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1711_CR25","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N., AN IMAGE IS WORTH 16X16 WORDS: TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE:. In: ICLR 2021\u20139th International Conference on Learning Representations, [Online]. (2021). Available: https:\/\/www.scopus.com\/inward\/record.uri?eid=2-s2.0-85150208907&partnerID=40&md5=e9616b2256a7db273c2ed80b6f498721"},{"key":"1711_CR26","doi-asserted-by":"publisher","unstructured":"Wu, B., Xu, C., Dai, X., Wan, A., Zhang, P., Yan, Z., Tomizuka, M., Gonzalez, J., Keutzer, K., Vajda, P.: Visual Transformers: Token-based Image Representation and Processing for Computer Vision. arXiv:2006.03677 (2020). https:\/\/doi.org\/10.48550\/arXiv.2006.03677","DOI":"10.48550\/arXiv.2006.03677"},{"key":"1711_CR27","doi-asserted-by":"publisher","unstructured":"Mayer, Z., Kahn, J., Hou, Y., G\u00f6tz, M., Volk, R., Schultmann, F.: Deep learning approaches to Building rooftop thermal Bridge detection from aerial images. Autom. Constr. 146 104690 (2023). https:\/\/doi.org\/10.1016\/j.autcon.2022.104690","DOI":"10.1016\/j.autcon.2022.104690"},{"key":"1711_CR28","doi-asserted-by":"publisher","unstructured":"Meng, H., Si, S., Mao, B., Zhao, J., Wu, L.: LAGSwin: Local attention guided Swin-transformer for thermal infrared sports object detection. PLOS ONE. 19(4) (2024). https:\/\/doi.org\/10.1371\/journal.pone.0297068 e0297068","DOI":"10.1371\/journal.pone.0297068"},{"key":"1711_CR29","doi-asserted-by":"publisher","unstructured":"Li, Y., Mao, H., Girshick, R., He, K.: Exploring Plain Vision Transformer Backbones for Object Detection. presented at the Computer Vision\u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part IX, Tel Aviv, Israel, 2022. [Online]. (2022). Available: https:\/\/doi.org\/10.1007\/978-3-031-20077-9_17","DOI":"10.1007\/978-3-031-20077-9_17"},{"key":"1711_CR30","doi-asserted-by":"publisher","unstructured":"Zhu, M., Liu, M.: Mobile Video Object Detection with Temporally-Aware Feature Maps. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 18\u201323 June 2018, pp. 5686\u20135695, (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00596","DOI":"10.1109\/CVPR.2018.00596"},{"key":"1711_CR31","doi-asserted-by":"publisher","unstructured":"Soga, M., Hiratsuka, S., Fukamachi, H., Ninomiya, Y.: Pedestrian Detection for a Near Infrared Imaging System. In: 2008 11th International IEEE Conference on Intelligent Transportation Systems, 12\u201315 Oct. pp. 1167\u20131172, (2008). https:\/\/doi.org\/10.1109\/ITSC.2008.4732710","DOI":"10.1109\/ITSC.2008.4732710"},{"key":"1711_CR32","doi-asserted-by":"publisher","unstructured":"Li, Z., Wu, B., Nevatia, R.: Pedestrian Detection in Infrared Images based on Local Shape Features. In: 2007 IEEE Conference on Computer Vision and Pattern Recognition, 17\u201322 June 2007, pp. 1\u20138, (2007). https:\/\/doi.org\/10.1109\/CVPR.2007.383452","DOI":"10.1109\/CVPR.2007.383452"},{"key":"1711_CR33","doi-asserted-by":"publisher","unstructured":"Baek, J., Hong, S., Kim, J., Kim, E.: Efficient pedestrian detection at nighttime using a thermal camera. Sensors. 17(8) (2017). https:\/\/doi.org\/10.3390\/s17081850","DOI":"10.3390\/s17081850"},{"issue":"3","key":"1711_CR34","doi-asserted-by":"publisher","first-page":"6763","DOI":"10.3390\/s150306763","volume":"15","author":"ES Jeon","year":"2015","unstructured":"Jeon, E.S., Choi, J.-S., Lee, J.H., Shin, K.Y., Kim, Y.G., Le, T.T., Park, K.R.: Human detection based on the generation of a background image by using a Far-Infrared light camera. Sensors. 15(3), 6763\u20136788 (2015). https:\/\/doi.org\/10.3390\/s150306763","journal-title":"Sensors"},{"issue":"1","key":"1711_CR35","doi-asserted-by":"publisher","first-page":"107","DOI":"10.14801\/jkiit.2019.17.1.107","volume":"17","author":"S Nazeer","year":"2019","unstructured":"Nazeer, S., Gwang-Hyun, Y., Dat, T., Do-Seong, T., Jin-Young, S.: Real-Time implementation of human detection in thermal imagery based on CNN. J. Korean Inst. Inform. Technol. 17(1), 107\u2013121 (2019). https:\/\/doi.org\/10.14801\/jkiit.2019.17.1.107","journal-title":"J. Korean Inst. Inform. Technol."},{"key":"1711_CR36","doi-asserted-by":"publisher","unstructured":"Akshatha, K.R., Karunakar, A.K., Shenoy, S.B., Pai, A.K., Nagaraj, N.H., Rohatgi, S.S.: Human detection in aerial thermal images using faster R-CNN and SSD algorithms. Electronics. 11(7) (2022). https:\/\/doi.org\/10.3390\/electronics11071151","DOI":"10.3390\/electronics11071151"},{"key":"1711_CR37","doi-asserted-by":"publisher","first-page":"103694","DOI":"10.1016\/j.infrared.2021.103694","volume":"115","author":"X Dai","year":"2021","unstructured":"Dai, X., Hu, J., Zhang, H., Shitu, A., Luo, C., Osman, A., Sfarra, S., Duan, Y.: Multi-task faster R-CNN for nighttime pedestrian detection and distance Estimation. Infrared Phys. Technol. 115, 103694 (2021). https:\/\/doi.org\/10.1016\/j.infrared.2021.103694","journal-title":"Infrared Phys. Technol."},{"issue":"4","key":"1711_CR38","doi-asserted-by":"publisher","first-page":"3962","DOI":"10.11591\/ijece.v14i4.pp3962-3970","volume":"14","author":"TD Trinh","year":"2024","unstructured":"Trinh, T.D., Le Thien Vu, C., The Bao, P.: Comparing Mask R-CNN backbone architectures for human detection using thermal imaging. International Journal of Electrical and Computer Engineering (IJECE) 14(4), 3962\u20133970 (2024)","journal-title":"International Journal of Electrical and Computer Engineering (IJECE)"},{"key":"1711_CR39","doi-asserted-by":"publisher","unstructured":"Mantau, A.J., Widayat, I.W., Leu, J.-S., K\u00f6ppen, M.: A Human-Detection method based on YOLOv5 and transfer learning using thermal image data from UAV perspective for surveillance system. Drones. 6(10) (2022). https:\/\/doi.org\/10.3390\/drones6100290","DOI":"10.3390\/drones6100290"},{"key":"1711_CR40","doi-asserted-by":"publisher","unstructured":"Iva\u0161i\u0107-Kos, M., Kri\u0161to, M., Pobar, M.: Human Detection in Thermal Imaging Using YOLO. presented at the Proceedings of the 2019 5th International Conference on Computer and Technology Applications, Istanbul, Turkey, 2019. [Online]. (2019). Available: https:\/\/doi.org\/10.1145\/3323933.3324076","DOI":"10.1145\/3323933.3324076"},{"issue":"2","key":"1711_CR41","doi-asserted-by":"publisher","first-page":"202","DOI":"10.3103\/s0146411621020097","volume":"55","author":"X Hong","year":"2021","unstructured":"Hong, X., Zhu, L.: Detecting small objects in thermal images using Single-Shot detector. Autom. Control Comput. Sci. 55(2), 202\u2013211 (2021). https:\/\/doi.org\/10.3103\/s0146411621020097","journal-title":"Autom. Control Comput. Sci."},{"key":"1711_CR42","doi-asserted-by":"publisher","unstructured":"Johansen, A.S., Nasrollahi, K., Escalera, S., Moeslund, T.B.: Who cares about the weather?? Inferring weather? conditions for weather?-Aware object detection in thermal images. Appl. Sci. 13(18) (2023). https:\/\/doi.org\/10.3390\/app131810295","DOI":"10.3390\/app131810295"},{"key":"1711_CR43","doi-asserted-by":"publisher","first-page":"35352","DOI":"10.1109\/ACCESS.2023.3264714","volume":"11","author":"J Smith","year":"2023","unstructured":"Smith, J., Loncomilla, P., Ruiz-Del-Solar, J.: Human pose Estimation using thermal images. IEEE Access. 11, 35352\u201335370 (2023). https:\/\/doi.org\/10.1109\/ACCESS.2023.3264714","journal-title":"IEEE Access."},{"key":"1711_CR44","doi-asserted-by":"publisher","unstructured":"Shin, Y., Kim, S.: Infrared Pedestrian Dataset Training using Swin Transformer model. In: 2022 22nd International Conference on Control, Automation and Systems (ICCAS), 27 Nov.-1 Dec. pp. 1301\u20131304, (2022). https:\/\/doi.org\/10.23919\/ICCAS55662.2022.10003891","DOI":"10.23919\/ICCAS55662.2022.10003891"},{"key":"1711_CR45","doi-asserted-by":"publisher","unstructured":"Chen, Y., Ye, J., Wan, X.: TF-YOLO: A Transformer\u2013Fusion-Based YOLO detector for multimodal pedestrian detection in autonomous driving scenes. World Electr. Veh. J. 14(12) (2023). https:\/\/doi.org\/10.3390\/wevj14120352","DOI":"10.3390\/wevj14120352"},{"key":"1711_CR46","doi-asserted-by":"publisher","first-page":"107917","DOI":"10.1016\/j.compbiomed.2024.107917","volume":"170","author":"Y Chen","year":"2024","unstructured":"Chen, Y., Zhang, C., Chen, B., Huang, Y., Sun, Y., Wang, C., Fu, X., Dai, Y., Qin, F., Peng, Y., Gao, Y.: Accurate leukocyte detection based on deformable-DETR and multi-level feature fusion for aiding diagnosis of blood diseases. Comput. Biol. Med. 170, 107917 (2024). https:\/\/doi.org\/10.1016\/j.compbiomed.2024.107917","journal-title":"Comput. Biol. Med."},{"key":"1711_CR47","doi-asserted-by":"publisher","unstructured":"Dat, T.T., Le Thien Vu, P.C., Truong, N.N., Anh Dang, L.T., Sang, T., Bao, V.N.: P.T.: Leaf Recognition Based on Joint Learning Multiloss of Multimodel Convolutional Neural Networks: A Testing for Vietnamese Herb. Computational Intelligence and Neuroscience. 5032359 (2021) (2021). (1) https:\/\/doi.org\/10.1155\/2021\/5032359","DOI":"10.1155\/2021\/5032359"},{"key":"1711_CR48","doi-asserted-by":"publisher","unstructured":"Zhao, Z., Bai, H., Zhang, J., Zhang, Y., Xu, S., Lin, Z., Timofte, R., Gool, L.V.: CDDFuse: Correlation-Driven Dual-Branch Feature Decomposition for Multi-Modality Image Fusion. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 17\u201324 June 2023, pp. 5906\u20135916, (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.00572","DOI":"10.1109\/CVPR52729.2023.00572"},{"key":"1711_CR49","doi-asserted-by":"publisher","unstructured":"Tian, D., Yan, X., Zhou, D., Wang, C., Zhang, W.: IV-YOLO: A lightweight Dual-Branch object detection network. Sensors. 24(19) (2024). https:\/\/doi.org\/10.3390\/s24196181","DOI":"10.3390\/s24196181"},{"key":"1711_CR50","doi-asserted-by":"publisher","unstructured":"Wang, J., Su, N., Zhao, C., Yan, Y., Feng, S.: Multi-Modal object detection method based on Dual-Branch asymmetric attention backbone and feature fusion pyramid network. Remote Sens. 16(20) (2024). https:\/\/doi.org\/10.3390\/rs16203904","DOI":"10.3390\/rs16203904"},{"key":"1711_CR51","doi-asserted-by":"publisher","unstructured":"Eltahan, M., Elsayed, K.: Multi-Domain Thermal Object Detection Using Generative Adversarial Networks. In: 2022 International Conference on Digital Image Computing: Techniques and Applications (DICTA), 30 Nov.-2 Dec. pp. 1\u20135, (2022) (2022). https:\/\/doi.org\/10.1109\/DICTA56598.2022.10034641","DOI":"10.1109\/DICTA56598.2022.10034641"},{"issue":"2","key":"1711_CR52","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1007\/s13748-019-00203-0","volume":"9","author":"A Dhillon","year":"2020","unstructured":"Dhillon, A., Verma, G.K.: Convolutional neural network: A review of models, methodologies and applications to object detection. Progress Artif. Intell. 9(2), 85\u2013112 (2020). https:\/\/doi.org\/10.1007\/s13748-019-00203-0","journal-title":"Progress Artif. Intell."},{"key":"1711_CR53","doi-asserted-by":"publisher","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: MobileNetV2: Inverted Residuals and Linear Bottlenecks. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 18\u201323 June 2018, pp. 4510\u20134520, (2018). https:\/\/doi.org\/10.1109\/CVPR.2018.00474","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1711_CR54","doi-asserted-by":"publisher","unstructured":"Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature Pyramid Networks for Object Detection. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 21\u201326 July 2017, pp. 936\u2013944, (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.106","DOI":"10.1109\/CVPR.2017.106"},{"key":"1711_CR55","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. presented at the Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, California, USA, (2017). (2017)"},{"key":"1711_CR56","unstructured":"Hutchins, D., Schlag, I., Wu, Y., Dyer, E., Neyshabur, B.: Block-recurrent transformers. presented at the Proceedings of the 36th International Conference on Neural Information Processing Systems, New Orleans, LA, USA, (2024). (2024)"},{"key":"1711_CR57","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J.: MMDetection: Open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155. (2019)"},{"key":"1711_CR58","unstructured":"Chen, X., Liang, C., Huang, D., Real, E., Wang, K., Pham, H., Dong, X., Luong, T., Hsieh, C.-J., Lu, Y., Le, Q.V.: Symbolic discovery of optimization algorithms. presented at the Proceedings of the 37th International Conference on Neural Information Processing Systems, New Orleans, LA, USA, (2024). (2024)"},{"issue":"3","key":"1711_CR59","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou, Z., Chen, K., Shi, Z., Guo, Y., Ye, J.: Object detection in 20 years: A survey. Proc. IEEE. 111(3), 257\u2013276 (2023). https:\/\/doi.org\/10.1109\/JPROC.2023.3238524","journal-title":"Proc. IEEE"},{"key":"1711_CR60","unstructured":"Liu, M., Zhu, M., White, M., Li, Y., Kalenichenko, D.: Looking fast and slow: Memory-guided mobile video object detection. arXiv preprint arXiv:1903.10172. (2019)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-025-01711-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-025-01711-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-025-01711-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T17:51:06Z","timestamp":1757181066000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-025-01711-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,5]]},"references-count":60,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["1711"],"URL":"https:\/\/doi.org\/10.1007\/s00138-025-01711-x","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6,5]]},"assertion":[{"value":"17 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 May 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 May 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 June 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"88"}}