{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T12:13:15Z","timestamp":1775131995748,"version":"3.50.1"},"reference-count":33,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T00:00:00Z","timestamp":1770076800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62302501"],"award-info":[{"award-number":["62302501"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]},{"name":"National Social Science Fund of China","award":["23CXW012"],"award-info":[{"award-number":["23CXW012"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2026,4]]},"DOI":"10.1007\/s00530-025-02176-8","type":"journal-article","created":{"date-parts":[[2026,2,3]],"date-time":"2026-02-03T03:43:44Z","timestamp":1770090224000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Multi-view and spatial-correlation interaction for multi-scale object detection"],"prefix":"10.1007","volume":"32","author":[{"given":"Yike","family":"Yang","sequence":"first","affiliation":[]},{"given":"Zhaohui","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Zekun","family":"Li","sequence":"additional","affiliation":[]},{"given":"Peidong","family":"He","sequence":"additional","affiliation":[]},{"given":"Ziqi","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yaqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yuan","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Bing","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Bai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,3]]},"reference":[{"key":"2176_CR1","doi-asserted-by":"crossref","unstructured":"Li, Y., Chen, Y., Wang, N., Zhang, Z.-X.: Scale-aware trident networks for object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 6054\u20136063 (2019)","DOI":"10.1109\/ICCV.2019.00615"},{"key":"2176_CR2","doi-asserted-by":"crossref","unstructured":"Pathiraja, B., Gunawardhana, M., Khan, M.H.: Multiclass confidence and localization calibration for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19734\u201319743 (2023)","DOI":"10.1109\/CVPR52729.2023.01890"},{"key":"2176_CR3","doi-asserted-by":"crossref","unstructured":"Zohar, O., Wang, K.-C., Yeung, S.: Prob: Probabilistic objectness for open world object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11444\u201311453 (2023)","DOI":"10.1109\/CVPR52729.2023.01101"},{"key":"2176_CR4","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"2176_CR5","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., Jia, J.: Path aggregation network for instance segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8759\u20138768 (2018)","DOI":"10.1109\/CVPR.2018.00913"},{"key":"2176_CR6","doi-asserted-by":"crossref","unstructured":"Pang, J., Chen, K., Shi, J., Feng, H., Ouyang, W., Lin, D.: Libra r-cnn: Towards balanced learning for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 821\u2013830 (2019)","DOI":"10.1109\/CVPR.2019.00091"},{"key":"2176_CR7","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S.J., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European Conference on Computer Vision, pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2176_CR8","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"2176_CR9","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"2176_CR10","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"2176_CR11","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.-Y., Berg, A.C.: Ssd: Single shot multibox detector. In: European Conference on Computer Vision, pp. 21\u201337 (2016). Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"2176_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"2176_CR13","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: Fcos: Fully convolutional one-stage object detection. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 9626\u20139635 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"2176_CR14","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2176_CR15","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. arXiv preprint arXiv:2102.12122 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"2176_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. arXiv preprint arXiv:2103.14030 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2176_CR17","doi-asserted-by":"crossref","unstructured":"Wu, H., Xiao, B., Codella, N., Liu, M., Dai, X., Yuan, L., Zhang, L.: Cvt: Introducing convolutions to vision transformers. arXiv preprint arXiv:2103.15808 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"2176_CR18","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607 (2020). PMLR"},{"key":"2176_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2176_CR20","doi-asserted-by":"crossref","unstructured":"Guo, C., Fan, B., Zhang, Q., Xiang, S., Pan, C.: Augfpn: Improving multi-scale feature learning for object detection. In: CVPR 2020: Computer Vision and Pattern Recognition, pp. 12595\u201312604 (2020)","DOI":"10.1109\/CVPR42600.2020.01261"},{"key":"2176_CR21","first-page":"11131","volume":"33","author":"L Song","year":"2020","unstructured":"Song, L., Li, Y., Jiang, Z., Li, Z., Sun, H., Sun, J., Zheng, N.: Fine-grained dynamic head for object detection. Adv. Neural. Inf. Process. Syst. 33, 11131\u201311141 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2176_CR22","doi-asserted-by":"crossref","unstructured":"Hu, M., Li, Y., Fang, L., Wang, S.: A2-fpn: Attention aggregation based feature pyramid network for instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15343\u201315352 (2021)","DOI":"10.1109\/CVPR46437.2021.01509"},{"key":"2176_CR23","doi-asserted-by":"crossref","unstructured":"Li, Z., Liu, Y., Li, B., Hu, W., Zhang, H.: Dsic: Dynamic sample-individualized connector for multi-scale object detection. In: 2021 IEEE International Conference on Multimedia and Expo (ICME) (2021)","DOI":"10.1109\/ICME51207.2021.9428167"},{"key":"2176_CR24","doi-asserted-by":"crossref","unstructured":"Park, J., Kahatapitiya, K., Kim, D., Sudalairaj, S., Fan, Q., Ryoo, M.S.: Grafting vision transformers. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1145\u20131154 (2024)","DOI":"10.1109\/WACV57701.2024.00118"},{"key":"2176_CR25","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"issue":"9","key":"2176_CR26","doi-asserted-by":"publisher","first-page":"6160","DOI":"10.1109\/TCSVT.2022.3162069","volume":"32","author":"Z Li","year":"2022","unstructured":"Li, Z., Liu, Y., Li, B., Feng, B., Wu, K., Peng, C., Hu, W.: Sdtp: semantic-aware decoupled transformer pyramid for dense image prediction. IEEE Trans. Circ. Syst. Video Technol. 32(9), 6160\u20136173 (2022)","journal-title":"IEEE Trans. Circ. Syst. Video Technol."},{"key":"2176_CR27","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J., et al.: Mmdetection: Open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155 (2019)"},{"key":"2176_CR28","doi-asserted-by":"crossref","unstructured":"Lu, X., Li, B., Yue, Y., Li, Q., Yan, J.: Grid r-cnn. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7363\u20137372 (2019)","DOI":"10.1109\/CVPR.2019.00754"},{"key":"2176_CR29","doi-asserted-by":"crossref","unstructured":"Chen, K., Pang, J., Wang, J., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Shi, J., Ouyang, W., et al.: Hybrid task cascade for instance segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4974\u20134983 (2019)","DOI":"10.1109\/CVPR.2019.00511"},{"key":"2176_CR30","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade r-cnn: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"2176_CR31","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, R., Jiang, Y., Kong, T., Xu, C., Zhan, W., Tomizuka, M., Li, L., Yuan, Z., Wang, C., et al.: Sparse r-cnn: End-to-end object detection with learnable proposals. arXiv preprint arXiv:2011.12450 (2020)","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"2176_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, S., Chi, C., Yao, Y., Lei, Z., Li, S.Z.: Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection. In: CVPR 2020: Computer Vision and Pattern Recognition, pp. 9759\u20139768 (2020)","DOI":"10.1109\/CVPR42600.2020.00978"},{"key":"2176_CR33","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Lin, T.-Y., Le, Q.V.: Nas-fpn: Learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7036\u20137045 (2019)","DOI":"10.1109\/CVPR.2019.00720"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02176-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-025-02176-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-025-02176-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T11:35:49Z","timestamp":1775129749000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-025-02176-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,3]]},"references-count":33,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,4]]}},"alternative-id":["2176"],"URL":"https:\/\/doi.org\/10.1007\/s00530-025-02176-8","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,3]]},"assertion":[{"value":"30 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"95"}}