{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T04:37:38Z","timestamp":1773203858526,"version":"3.50.1"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T00:00:00Z","timestamp":1724889600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T00:00:00Z","timestamp":1724889600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["32102598"],"award-info":[{"award-number":["32102598"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Project of Postgraduate Scientific Research Innovation Practice Activity of Jiangsu Province","award":["SJCX24_2390"],"award-info":[{"award-number":["SJCX24_2390"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s11554-024-01540-7","type":"journal-article","created":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T16:02:36Z","timestamp":1724947356000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["ESC-YOLO: optimizing apple fruit recognition with efficient spatial and channel features in YOLOX"],"prefix":"10.1007","volume":"21","author":[{"given":"Jun","family":"Sun","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yifei","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chen","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bing","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhaoqi","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yilin","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lei","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,29]]},"reference":[{"key":"1540_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2020.105687","volume":"177","author":"L Fu","year":"2020","unstructured":"Fu, L., Gao, F., Wu, J., Li, R., Karkee, M., Zhang, Q.: Application of consumer RGB-D cameras for fruit detection and localization in field: a critical review. Comput. Electron. Agric. 177, 105687 (2020)","journal-title":"Comput. Electron. Agric."},{"key":"1540_CR2","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2020.105606","volume":"175","author":"Z Zhang","year":"2020","unstructured":"Zhang, Z., Igathinathane, C., Li, J., Cen, H., Lu, Y., Flores, P.: Technology progress in mechanical harvest of fresh market apples. Comput. Electron. Agric. 175, 105606 (2020)","journal-title":"Comput. Electron. Agric."},{"issue":"10","key":"1540_CR3","first-page":"59","volume":"33","author":"D Wang","year":"2017","unstructured":"Wang, D., Song, H., He, D.: Research advance on vision system of apple picking robot. Trans. Chin. Soc. Agric. Eng. 33(10), 59\u201369 (2017)","journal-title":"Trans. Chin. Soc. Agric. Eng."},{"issue":"17","key":"1540_CR4","doi-asserted-by":"publisher","first-page":"4150","DOI":"10.3390\/rs14174150","volume":"14","author":"C Zhang","year":"2022","unstructured":"Zhang, C., Kang, F., Wang, Y.: An improved apple object detection method based on lightweight YOLOv4 in complex backgrounds. Remote Sens. 14(17), 4150\u20134150 (2022)","journal-title":"Remote Sens."},{"key":"1540_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.atech.2023.100345","volume":"6","author":"LG Divyanth","year":"2023","unstructured":"Divyanth, L.G., Rathore, D., Senthilkumar, P., Patidar, P., Zhang, X., Karkee, M., Machavaram, R., Soni, P.: Estimating depth from RGB images using deep-learning for robotic applications in apple orchards. Smart Agric. Technol. 6, 100345 (2023)","journal-title":"Smart Agric. Technol."},{"issue":"3","key":"1540_CR6","doi-asserted-by":"publisher","first-page":"476","DOI":"10.3390\/agronomy11030476","volume":"11","author":"L Wu","year":"2021","unstructured":"Wu, L., Ma, J., Zhao, Y., Liu, H.: Apple detection in complex scene using the improved YOLOv4 model. Agronomy 11(3), 476 (2021)","journal-title":"Agronomy"},{"key":"1540_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2022.106705","volume":"193","author":"J Sun","year":"2022","unstructured":"Sun, J., Yang, K., Chen, C., Shen, J., Yang, Y., Wu, X., Tomas, N.: Wheat head counting in the wild by an augmented feature pyramid networks-based convolutional neural network. Comput. Electron. Agric. 193, 106705 (2022)","journal-title":"Comput. Electron. Agric."},{"key":"1540_CR8","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: SSD: single shot multibox detector. In: Computer Vision-ECCV 2016: 14th European Conference, Amsterdam, Proceedings, Part I, pp. 21\u201337. The Netherlands, October 11\u201314 (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1540_CR9","unstructured":"Bochkovskiy, A., Wang, C.-Y., Liao, H.-Y.M.: YOLOv4: optimal speed and accuracy of object detection (2020). arXiv preprint arXiv:2004.10934"},{"key":"1540_CR10","unstructured":"Li, C., Li, L., Jiang, H., Weng, K., Geng, Y., Li, L., Ke, Z., Li, Q., Cheng, M., Nie, W., Li, Y., Zhang, B., Liang, Y., Zhou, L., Xu, X., Chu, X., Wei, X., Wei, X.: YOLOv6: a single-stage object detection framework for industrial applications (2022). arXiv preprint, arXiv:2209.02976"},{"key":"1540_CR11","unstructured":"Redmon, J., Farhadi, A.: YOLOv3: an incremental improvement. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. Salt Lake, USA (2018)"},{"key":"1540_CR12","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y. M.: YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1540_CR13","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., Malik, J.: Rich feature hierarchies for accurate object detection and semantic segmentation. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition, pp. 580\u2013587 (2014)","DOI":"10.1109\/CVPR.2014.81"},{"key":"1540_CR14","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"issue":"6","key":"1540_CR15","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2015","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans. Pattern Anal. Mach. Intell. 39(6), 1137\u20131149 (2015)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1540_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2961\u20132969 (2018)","DOI":"10.1109\/ICCV.2017.322"},{"key":"1540_CR17","unstructured":"Ge, Z., Liu, S., Wang, F., Li, Z., Sun, J.: YOLOX: exceeding YOLO series in 2021 (2021). arXiv preprint arXiv:2107.08430"},{"key":"1540_CR18","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768 (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1540_CR19","doi-asserted-by":"crossref","unstructured":"Liu, X., Peng, H., Zheng, N., Yang, Y., Hu, H., Yuan, Y.: EfficientViT: memory efficient vision transformer with cascaded group attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14420\u201314430 (2023)","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"1540_CR20","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1540_CR21","doi-asserted-by":"crossref","unstructured":"Li, J., Wen, Y., He, L.: SCConv: spatial and channel reconstruction convolution for feature redundancy. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2023)","DOI":"10.1109\/CVPR52729.2023.00596"},{"key":"1540_CR22","unstructured":"Xu, W., Wan, Y.: ELA: efficient local attention for deep convolutional neural networks (2024). arXiv preprint, arXiv:2403.01123"},{"key":"1540_CR23","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.-C.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1540_CR24","unstructured":"Tan, M., Le, Q.: EfficientNet: rethinking model scaling for convolutional neural networks. In: Proceedings of the 36th International Conference on Machine Learning, PMLR, vol. 97, pp. 6105\u20136114 (2019)"},{"key":"1540_CR25","doi-asserted-by":"publisher","first-page":"4341","DOI":"10.1109\/TIP.2023.3297408","volume":"32","author":"Y Quan","year":"2022","unstructured":"Quan, Y., Zhang, D., Zhang, L., Tang, J.: Centralized feature pyramid for object detection. IEEE Trans. Image Process. 32, 4341\u20134354 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"1540_CR26","unstructured":"Yang, L., Zhang, R.-Y., Li, L., Xie, X.: SimAM: a simple, parameter-free attention module for convolutional neural networks. In: Proceedings of the 38th International Conference on Machine Learning, pp. 11863\u201311874 (2021)"},{"key":"1540_CR27","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: a metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"1540_CR28","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization (2017). arXiv preprint, arXiv:1412.6980"},{"key":"1540_CR29","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., Houlsby, N.: An image is worth 16x16 words: transformers for image recognition at scale (2020). arXiv preprint, arXiv:2010.11929"},{"key":"1540_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.-Y., Feichtenhofer, C., Darrell, T., Xie, S.: A ConvNet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"1540_CR31","unstructured":"Tan, M., Le, Q.: EfficientNetV2: smaller models and faster training. In: Proceedings of the 38th International Conference on Machine Learning, PMLR, vol. 139, pp. 10096\u201310106 (2021)"},{"key":"1540_CR32","doi-asserted-by":"crossref","unstructured":"Vasu, P.K.A., Gabriel, J., Zhu, J., Tuzel, O., Ranjan, A.: MobileOne: an improved one millisecond mobile backbone. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7907\u20137917 (2023)","DOI":"10.1109\/CVPR52729.2023.00764"},{"key":"1540_CR33","doi-asserted-by":"crossref","unstructured":"Wang, A., Chen, H., Lin, Z., Han, J., Ding, G.: RepViT: revisiting mobile CNN from ViT perspective. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15909\u201315920 (2024)","DOI":"10.1109\/CVPR52733.2024.01506"},{"key":"1540_CR34","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Dollar, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1492\u20131500 (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"1540_CR35","doi-asserted-by":"crossref","unstructured":"Wu, Y., He, K.: Group normalization. In: Proceedings of the European Conference on Computer Vision, pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"1540_CR36","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1540_CR37","unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint, arXiv:1704.04861 (2017)"},{"key":"1540_CR38","unstructured":"Tolstikhin, I., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., Yung, J., Keysers, D., Uszkoreit, J., Lucim, M.:. MLP-mixer: an all-MLP architecture for vision (2021). arXiv preprint, arXiv:2105.01601"},{"key":"1540_CR39","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"1540_CR40","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: CBAM: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision, pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1540_CR41","unstructured":"Wang, C., He, W., Nie, Y., Guo, J., Liu, C., Han, K., Wang, Y.: Gold-YOLO: efficient object detector via gather-and-distribute mechanism. In: Advances in Neural Information Processing Systems, vol. 36 (2023)"},{"key":"1540_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: DETRs beat YOLOs on real-time object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16965\u201316974 (2024)","DOI":"10.1109\/CVPR52733.2024.01605"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01540-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-024-01540-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01540-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,15]],"date-time":"2024-10-15T15:20:40Z","timestamp":1729005640000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-024-01540-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,29]]},"references-count":42,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["1540"],"URL":"https:\/\/doi.org\/10.1007\/s11554-024-01540-7","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,29]]},"assertion":[{"value":"26 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}],"article-number":"162"}}