{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T15:08:44Z","timestamp":1778252924825,"version":"3.51.4"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T00:00:00Z","timestamp":1761004800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T00:00:00Z","timestamp":1761004800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["52302508"],"award-info":[{"award-number":["52302508"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s11760-025-04871-5","type":"journal-article","created":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T01:35:04Z","timestamp":1761010504000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["SHM-YOLO: Detection of occluded small objects in top-down views from tower cranes under adverse rain and snow conditions"],"prefix":"10.1007","volume":"19","author":[{"given":"Yudong","family":"Pang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhixing","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cong","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanxue","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenkun","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,21]]},"reference":[{"key":"4871_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jobe.2022.104007","volume":"49","author":"M Xu","year":"2022","unstructured":"Xu, M., Nie, X., Li, H., Cheng, J.C.: Smart construction sites: a promising approach to improving on-site HSE management performance. J. Build. Eng. 49, 104007 (2022). https:\/\/doi.org\/10.1016\/j.jobe.2022.104007","journal-title":"J. Build. Eng."},{"key":"4871_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.105103","volume":"156","author":"S Kim","year":"2023","unstructured":"Kim, S., Hong, S.H., Kim, H., Lee, M.: Small object detection (SOD) system for comprehensive construction site safety monitoring. Autom. Constr. 156, 105103 (2023). https:\/\/doi.org\/10.1016\/j.autcon.2023.105103","journal-title":"Autom. Constr."},{"key":"4871_CR3","unstructured":"Jocher, G., Stoken, A., Borovec, J., Changyu, L., Hogan, A., Diaconu, L., Poznanski, J., Yu, L., Rai, P., Ferriday, R., Sullivan, T.: \"YOLOv5\", (2020), https:\/\/github.com\/ultralytics\/yolov5."},{"key":"4871_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.autcon.2023.105244","volume":"158","author":"HS Kim","year":"2024","unstructured":"Kim, H.S., Seong, J., Jung, H.J.: Optimal domain adaptive object detection with self-training and adversarial-based approach for construction site monitoring. Autom. Constr. 158, 105244 (2024). https:\/\/doi.org\/10.1016\/j.autcon.2023.105244","journal-title":"Autom. Constr."},{"issue":"11","key":"4871_CR5","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020). https:\/\/doi.org\/10.1145\/3422622","journal-title":"Commun. ACM"},{"issue":"11","key":"4871_CR6","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"issue":"19","key":"4871_CR7","doi-asserted-by":"publisher","DOI":"10.3390\/app131910700","volume":"13","author":"X Li","year":"2023","unstructured":"Li, X., Hao, T., Li, F., Zhao, L., Wang, Z.: Faster r-cnn-lstm construction site unsafe behavior recognition model. Appl. Sci. 13(19), 10700 (2023). https:\/\/doi.org\/10.3390\/app131910700","journal-title":"Appl. Sci."},{"key":"4871_CR8","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: unified, real-time object detection, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016).","DOI":"10.1109\/CVPR.2016.91"},{"key":"4871_CR9","unstructured":"Redmon, J., Farhadi,A.: YOLOv3: An Incremental Improvement, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20136 (2018)"},{"key":"4871_CR10","unstructured":"Bochkovskiy, A., Wang, C.-Y. Liao, H.-Y. M.: YOLOv4: Optimal Speed and Accuracy of Object Detection, arXiv:2004.10934 (2020)."},{"key":"4871_CR11","unstructured":"Li, C., Li, L., Jiang, H., Weng, K., Geng, Y., Li, L., Ke, Z.: YOLOv6: A single-stage object detection framework for industrial applications, arXiv:2209.02976 (2022)."},{"key":"4871_CR12","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y. M.: YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors, In: Proceedings of the IEEE\/CVF\u00a0Conference on\u00a0Computer\u00a0Vision and\u00a0Pattern Recognition, pp. 7464\u20137475 (2023).","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"4871_CR13","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics YOLOv8, (2023), https:\/\/github.com\/ultralytics\/ultralytics."},{"key":"4871_CR14","unstructured":"Khanam, R., Hussain, M.: Yolov11: An overview of the key architectural enhancements, arXiv:2410.17725 (2024)."},{"key":"4871_CR15","doi-asserted-by":"publisher","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C. L.: Microsoft coco: Common objects in context, In Computer Vision \u2013 ECCV 2014, eds. D. Fleet et al., Lect. Notes Comput. Sci (Springer, Cham, 2014), pp. 740\u2013755, https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"4871_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes (voc) challenge. Int. J. Comput. Vis. 88, 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"4871_CR17","doi-asserted-by":"crossref","unstructured":"Chen, Y., Li, W., Sakaridis, C., Dai, D., Van Gool, L.: Domain adaptive faster r-cnn for object detection in the wild, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3339\u20133348 (2018).","DOI":"10.1109\/CVPR.2018.00352"},{"issue":"2","key":"4871_CR18","doi-asserted-by":"publisher","first-page":"2384","DOI":"10.1109\/TPAMI.2022.3179528","volume":"45","author":"X Yang","year":"2022","unstructured":"Yang, X., Yan, J., Liao, W., Yang, X., Tang, J., He, T.: Scrdet++: Detecting small, cluttered and rotated objects via instance-level feature denoising and rotation loss smoothing. IEEE Trans. Pattern Anal. Mach. Intell. 45(2), 2384\u20132399 (2022). https:\/\/doi.org\/10.1109\/TPAMI.2022.3179528","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4871_CR19","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation, In: Proceedings of the IEEE conference on computer vision and pattern Recognition, pp. 580\u2013587 (2014).","DOI":"10.1109\/CVPR.2014.81"},{"key":"4871_CR20","doi-asserted-by":"crossref","unstructured":"Salscheider, N. O.: Featurenms: Non-maximum suppression by learning feature embeddings, In: Proceedings of the 25th International Conference\u00a0on\u00a0Pattern Recognition, pp. 7848\u20137854 (2021).","DOI":"10.1109\/ICPR48806.2021.9412930"},{"key":"4871_CR21","doi-asserted-by":"publisher","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers, In: European Conference on Computer Vision., Lecture Notes in Computer Science (Springer, Cham, 2020), pp. 213\u2013229. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"4871_CR22","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: Deformable transformers for end-to-end object detection, arXiv:2010.04159 (2020)."},{"key":"4871_CR23","doi-asserted-by":"publisher","unstructured":"Ding, N., Eskandarian, A.: The impact of different backbone architecture on autonomous vehicle dataset, In: Proceedings of the ASME International Mechanical Engineering Congress\u00a0. Expo, p. V002T02A011 (2023). https:\/\/doi.org\/10.1115\/IMECE2023-114859","DOI":"10.1115\/IMECE2023-114859"},{"key":"4871_CR24","doi-asserted-by":"crossref","unstructured":"Tsai, Y.-H. H., Huang, L.-K., Salakhutdinov, R.: Learning robust visual-semantic embeddings, In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3571\u20133580 (2017).","DOI":"10.1109\/ICCV.2017.386"},{"issue":"1","key":"4871_CR25","doi-asserted-by":"publisher","first-page":"29062","DOI":"10.1038\/s41598-025-14786-3","volume":"15","author":"S Wang","year":"2015","unstructured":"Wang, S.: Development of approach to an automated acquisition of static street view images using transformer architecture for analysis of Building characteristics. Sci. Rep. 15(1), 29062 (2015)","journal-title":"Sci. Rep."},{"key":"4871_CR26","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017).","DOI":"10.1109\/CVPR.2017.106"},{"key":"4871_CR27","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J. Y., Kweon, I. S.: CBAM: Convolutional block attention module, In: The European Conference on Computer Vision, pp. 3\u201319 (2018).","DOI":"10.1007\/978-3-030-01234-2_1"},{"issue":"4","key":"4871_CR28","doi-asserted-by":"publisher","first-page":"1680","DOI":"10.3390\/make5040083","volume":"5","author":"J Terven","year":"2023","unstructured":"Terven, J., C\u00f3rdova-Esparza, D.-M., Romero-Gonz\u00e1lez, J.-A.: A comprehensive review of YOLO architectures in computer vision: from YOLOv1 to YOLOv8 and YOLO-NAS. Mach. Learn. Knowl. Extr. 5(4), 1680\u20131716 (2023)","journal-title":"Mach. Learn. Knowl. Extr."},{"key":"4871_CR29","doi-asserted-by":"crossref","unstructured":"Sapkota, R., Qureshi, R., Calero, M. F., Badjugar, C., Nepal, U., Poulose, A., Zeno, P., et al.: Yolov12 to its genesis: A decadal and comprehensive review of the you only look once (yolo) series. arXiv preprint arXiv:2406.19407 (2024).","DOI":"10.20944\/preprints202406.1366.v1"},{"issue":"1","key":"4871_CR30","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1016\/S0045-7906(01)00011-8","volume":"29","author":"RS Stankovi\u0107","year":"2003","unstructured":"Stankovi\u0107, R.S., Falkowski, B.J.: The haar wavelet transform: its status and achievements. Comput. Electr. Eng. 29(1), 25\u201344 (2003)","journal-title":"Comput. Electr. Eng."},{"key":"4871_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109819","volume":"143","author":"G Xu","year":"2023","unstructured":"Xu, G., Liao, W., Zhang, X., Li, C., He, X., Wu, X.: Haar wavelet downsampling: a simple but effective downsampling module for semantic segmentation. Pattern Recognit. 143, 109819 (2023). https:\/\/doi.org\/10.1016\/j.patcog.2023.109819","journal-title":"Pattern Recognit."},{"key":"4871_CR32","doi-asserted-by":"publisher","unstructured":"Yu, Z., Huang, H., Chen, W., Su, Y., Liu, Y., Wang, X.: YOLO-FaceV2: A scale and occlusion aware face detector, arXiv:2208.02019 (2022). https:\/\/doi.org\/10.1016\/j.patcog.2024.110714","DOI":"10.1016\/j.patcog.2024.110714"},{"key":"4871_CR33","unstructured":"Siliang, M., Yong, X.: MPDIoU: A loss for efficient and accurate bounding box regression, arXiv:2307.07662 (2023)."},{"key":"4871_CR34","unstructured":"Zhang, H., Xu, C., Zhang, S.: Inner-IoU: More effective intersection over union loss with auxiliary bounding box, arXiv:2311.02877 (2023)."},{"key":"4871_CR35","unstructured":"Boureau, Y.-L., Ponce, J., LeCun, Y.: A theoretical analysis of feature pooling in visual recognition, In: Proceedings of The 27th International Conference on Artificial Intelligence and Statistics, pp. 111\u2013118 (2010)."},{"key":"4871_CR36","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018).","DOI":"10.1109\/CVPR.2018.00745"},{"key":"4871_CR37","unstructured":"Hendrycks, D., Gimpel, K.: Gaussian error linear units (GELUs), arXiv:1606.08415 (2016)."},{"key":"4871_CR38","doi-asserted-by":"publisher","unstructured":"Chen, C., Liu, M.-Y., Tuzel, O., Xiao, J.: R-CNN for small object detection, In: Computer Vision \u2013 ACCV 2016, eds. S.-H. Lai et al., Lecture Notes in Computer Science (Springer, Cham, 2017), pp. 214\u2013230. https:\/\/doi.org\/10.1007\/978-3-319-54193-8_14","DOI":"10.1007\/978-3-319-54193-8_14"},{"key":"4871_CR39","doi-asserted-by":"publisher","unstructured":"Hore, A., Ziou,D.: Image quality metrics: PSNR vs. SSIM, In: Proceedings of the 2010\u00a020th International Conference on Pattern Recognition, pp. 2366\u20132369 (2010). https:\/\/doi.org\/10.1109\/ICPR.2010.579","DOI":"10.1109\/ICPR.2010.579"},{"key":"4871_CR40","unstructured":"Gevorgyan, Z.: SIoU loss: More powerful learning for bounding box regression, arXiv:2205.12740 (2022)."},{"key":"4871_CR41","unstructured":"Tong, Z., Chen, Y., Xu, Z., Yu, R.: Wise-IoU: Bounding box regression loss with dynamic focusing mechanism, arXiv:2301.10051 (2023)."},{"key":"4871_CR42","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese,S.: Generalized intersection over union: A metric and a loss for bounding box regression, In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019).","DOI":"10.1109\/CVPR.2019.00075"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04871-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-025-04871-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-025-04871-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,23]],"date-time":"2025-11-23T12:14:10Z","timestamp":1763900050000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-025-04871-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,21]]},"references-count":42,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["4871"],"URL":"https:\/\/doi.org\/10.1007\/s11760-025-04871-5","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,21]]},"assertion":[{"value":"5 June 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 September 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 October 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}],"article-number":"1281"}}