{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,14]],"date-time":"2026-02-14T08:28:32Z","timestamp":1771057712900,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,28]],"date-time":"2024-08-28T00:00:00Z","timestamp":1724803200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,28]],"date-time":"2024-08-28T00:00:00Z","timestamp":1724803200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003392","name":"Natural Science Foundation of Fujian Province","doi-asserted-by":"crossref","award":["2023J011437"],"award-info":[{"award-number":["2023J011437"]}],"id":[{"id":"10.13039\/501100003392","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003392","name":"Natural Science Foundation of Fujian Province","doi-asserted-by":"crossref","award":["2023J011437"],"award-info":[{"award-number":["2023J011437"]}],"id":[{"id":"10.13039\/501100003392","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Education and Research Foundation of Fujian Province","award":["JAT200464"],"award-info":[{"award-number":["JAT200464"]}]},{"name":"Education and Research Foundation of Fujian Province","award":["JAT200464"],"award-info":[{"award-number":["JAT200464"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s00371-024-03602-0","type":"journal-article","created":{"date-parts":[[2024,8,28]],"date-time":"2024-08-28T20:17:21Z","timestamp":1724876241000},"page":"3283-3295","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Lightweight CNN-ViT with cross-module representational constraint for express parcel detection"],"prefix":"10.1007","volume":"41","author":[{"given":"Guowei","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wuzhi","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yutong","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuixuan","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,28]]},"reference":[{"key":"3602_CR1","doi-asserted-by":"crossref","unstructured":"Liu, Q., Wu, J., Yin, L., Wu, W., Shen, Z.: Real-time detection and tracking of express parcels based on improved YOLOv5+ deepsort. In: International Conference on Intelligent Robotics and Applications, pp. 3\u201314. Springer (2023)","DOI":"10.1007\/978-981-99-6495-6_1"},{"issue":"17","key":"3602_CR2","doi-asserted-by":"publisher","first-page":"6705","DOI":"10.3390\/s22176705","volume":"22","author":"X Xu","year":"2022","unstructured":"Xu, X., Xue, Z., Zhao, Y.: Research on an algorithm of express parcel sorting based on deeper learning and multi-information recognition. Sensors 22(17), 6705 (2022)","journal-title":"Sensors"},{"issue":"2","key":"3602_CR3","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1007\/s12652-021-03350-2","volume":"14","author":"A Ding","year":"2023","unstructured":"Ding, A., Zhang, Y., Zhu, L., Li, H., Huang, L.: Intelligent recognition of rough handling of express parcels based on CNN-GRU with the channel attention mechanism. J. Ambient Intell. Humaniz. Comput. 14(2), 973\u2013990 (2023)","journal-title":"J. Ambient Intell. Humaniz. Comput."},{"key":"3602_CR4","unstructured":"Mehta, S., Rastegari, M.: Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178 (2021)"},{"key":"3602_CR5","unstructured":"Mehta, S., Rastegari, M.: Separable self-attention for mobile vision transformers. arXiv preprint arXiv:2206.02680 (2022)"},{"key":"3602_CR6","unstructured":"Wadekar, S.N., Chaurasia, A.: Mobilevitv3: mobile-friendly vision transformer with simple and effective fusion of local, global and input features. arXiv preprint arXiv:2209.15159 (2022)"},{"key":"3602_CR7","volume-title":"ultralytics\/yolov5: v7. 0-yolov5 Sota Realtime Instance Segmentation","author":"G Jocher","year":"2022","unstructured":"Jocher, G., Chaurasia, A., Stoken, A., Borovec, J., Kwon, Y., Michael, K., Fang, J., Yifu, Z., Wong, C., Montes, D., et al.: ultralytics\/yolov5: v7. 0-yolov5 Sota Realtime Instance Segmentation. Zenodo, Geneva (2022)"},{"key":"3602_CR8","doi-asserted-by":"crossref","unstructured":"Wang, C.-Y., Bochkovskiy, A., Liao, H.-Y.M.: Yolov7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7464\u20137475 (2023)","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"3602_CR9","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics YOLO. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"3602_CR10","doi-asserted-by":"crossref","unstructured":"Tan, M., Chen, B., Pang, R., Vasudevan, V., Sandler, M., Howard, A., Le, Q.V.: Mnasnet: platform-aware neural architecture search for mobile. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2820\u20132828 (2019)","DOI":"10.1109\/CVPR.2019.00293"},{"key":"3602_CR11","doi-asserted-by":"crossref","unstructured":"Maaz, M., Shaker, A., Cholakkal, H., Khan, S., Zamir, S.W., Anwer, R.M., Shahbaz\u00a0Khan, F.: Edgenext: efficiently amalgamated CNN-transformer architecture for mobile vision applications. In: European Conference on Computer Vision, pp. 3\u201320. Springer (2022)","DOI":"10.1007\/978-3-031-25082-8_1"},{"key":"3602_CR12","unstructured":"Howard, A.G., Zhu, M., Chen, B., Kalenichenko, D., Wang, W., Weyand, T., Andreetto, M., Adam, H.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"3602_CR13","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.-C.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"3602_CR14","doi-asserted-by":"crossref","unstructured":"Howard, A., Sandler, M., Chu, G., Chen, L.-C., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., et al.: Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"3602_CR15","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"3602_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3602_CR17","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3602_CR18","doi-asserted-by":"crossref","unstructured":"Yao, J., Chen, J., Niu, L., Sheng, B.: Scene-aware human pose generation using transformer. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 2847\u20132855 (2023)","DOI":"10.1145\/3581783.3612439"},{"key":"3602_CR19","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., Wei, Y.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"3602_CR20","doi-asserted-by":"crossref","unstructured":"Wang, R., Shivanna, R., Cheng, D., Jain, S., Lin, D., Hong, L., Chi, E.: Dcn v2: improved deep & cross network and practical lessons for web-scale learning to rank systems. In: Proceedings of the Web Conference 2021, pp. 1785\u20131797 (2021)","DOI":"10.1145\/3442381.3450078"},{"issue":"8","key":"3602_CR21","doi-asserted-by":"publisher","first-page":"4499","DOI":"10.1109\/TNNLS.2021.3116209","volume":"34","author":"Z Xie","year":"2021","unstructured":"Xie, Z., Zhang, W., Sheng, B., Li, P., Chen, C.P.: BaGFN: broad attentive graph fusion network for high-order feature interactions. IEEE Trans. Neural Netw. Learn. Syst. 34(8), 4499\u20134513 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"3602_CR22","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, 6\u201312 September 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3602_CR23","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.-T., Sun, J.: Shufflenet v2: practical guidelines for efficient CNN architecture design. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"3602_CR24","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"3602_CR25","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: an extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"3602_CR26","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: DETRs beat YOLOs on real-time object detection (2023)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"3602_CR27","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, X., Han, J., Ding, G.: Scaling up your kernels to 31 \u00d7 31: revisiting large kernel design in CNNs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11963\u201311975 (2022)","DOI":"10.1109\/CVPR52688.2022.01166"},{"key":"3602_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s00371-024-03422-2","volume":"24","author":"Y Guan","year":"2024","unstructured":"Guan, Y., Liao, S., Yang, W.: AParC-DETR: accelerate DETR training by introducing adaptive position-aware circular convolution. Vis. Comput. 24, 1\u201315 (2024). https:\/\/doi.org\/10.1007\/s00371-024-03422-2","journal-title":"Vis. Comput."},{"key":"3602_CR29","doi-asserted-by":"publisher","first-page":"50","DOI":"10.1109\/TMM.2021.3120873","volume":"25","author":"X Lin","year":"2021","unstructured":"Lin, X., Sun, S., Huang, W., Sheng, B., Li, P., Feng, D.D.: EAPT: efficient attention pyramid transformer for image processing. IEEE Trans. Multimed. 25, 50\u201361 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"3602_CR30","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2024.3382233","author":"L Li","year":"2024","unstructured":"Li, L., Chen, Z., Dai, L., Li, R., Sheng, B.: MA-MFCNet: mixed attention-based multi-scale feature calibration network for image dehazing. IEEE Trans. Emerg. Top. Comput. Intell. (2024). https:\/\/doi.org\/10.1109\/TETCI.2024.3382233","journal-title":"IEEE Trans. Emerg. Top. Comput. Intell."},{"key":"3602_CR31","doi-asserted-by":"crossref","unstructured":"Cui, Y., Yan, L., Cao, Z., Liu, D.: Tf-blender: temporal feature blender for video object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 8138\u20138147 (2021)","DOI":"10.1109\/ICCV48922.2021.00803"},{"key":"3602_CR32","doi-asserted-by":"crossref","unstructured":"Liu, D., Cui, Y., Tan, W., Chen, Y.: Sg-net: spatial granularity network for one-stage video instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9816\u20139825 (2021)","DOI":"10.1109\/CVPR46437.2021.00969"},{"key":"3602_CR33","doi-asserted-by":"crossref","unstructured":"Liu, D., Cui, Y., Yan, L., Mousas, C., Yang, B., Chen, Y.: Densernet: weakly supervised visual localization using multi-scale feature aggregation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 6101\u20136109 (2021)","DOI":"10.1609\/aaai.v35i7.16760"},{"key":"3602_CR34","unstructured":"Liang, J., Zhou, T., Liu, D., Wang, W.: Clustseg: clustering for universal segmentation. arXiv preprint arXiv:2305.02187 (2023)"},{"key":"3602_CR35","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"2","key":"3602_CR36","doi-asserted-by":"publisher","first-page":"652","DOI":"10.1109\/TPAMI.2019.2938758","volume":"43","author":"S-H Gao","year":"2019","unstructured":"Gao, S.-H., Cheng, M.-M., Zhao, K., Zhang, X.-Y., Yang, M.-H., Torr, P.: Res2net: a new multi-scale backbone architecture. IEEE Trans. Pattern Anal. Mach. Intell. 43(2), 652\u2013662 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3602_CR37","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: CBAM: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"3602_CR38","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"3602_CR39","unstructured":"Li, H., Li, J., Wei, H., Liu, Z., Zhan, Z., Ren, Q.: Slim-neck by GSConv: a better design paradigm of detector architectures for autonomous vehicles. arXiv preprint arXiv:2206.02424 (2022)"},{"key":"3602_CR40","unstructured":"Loshchilov, I., Hutter, F.: Sgdr: stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)"},{"key":"3602_CR41","unstructured":"Wang, A., Chen, H., Liu, L., Chen, K., Lin, Z., Han, J., Ding, G.: Yolov10: real-time end-to-end object detection. arXiv preprint arXiv:2405.14458 (2024)"},{"key":"3602_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Lv, W., Xu, S., Wei, J., Wang, G., Dang, Q., Liu, Y., Chen, J.: Detrs beat yolos on real-time object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16965\u201316974 (2024)","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"3602_CR43","unstructured":"Tan, M., Le, Q.V.: Mixconv: mixed depthwise convolutional kernels. arXiv preprint arXiv:1907.09595 (2019)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03602-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03602-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03602-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T09:09:31Z","timestamp":1741597771000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03602-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,28]]},"references-count":43,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["3602"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03602-0","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,28]]},"assertion":[{"value":"5 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 August 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}