{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T16:46:31Z","timestamp":1764175591435,"version":"3.37.3"},"reference-count":70,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T00:00:00Z","timestamp":1715040000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T00:00:00Z","timestamp":1715040000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["12101289","11871259","62076221"],"award-info":[{"award-number":["12101289","11871259","62076221"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003392","name":"Natural Science Foundation of Fujian Province","doi-asserted-by":"publisher","award":["2022J01891"],"award-info":[{"award-number":["2022J01891"]}],"id":[{"id":"10.13039\/501100003392","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00371-024-03422-2","type":"journal-article","created":{"date-parts":[[2024,5,7]],"date-time":"2024-05-07T20:12:54Z","timestamp":1715112774000},"page":"1319-1333","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["AParC-DETR: Accelerate DETR training by introducing Adaptive Position-aware Circular Convolution"],"prefix":"10.1007","volume":"41","author":[{"given":"Ya\u2019nan","family":"Guan","sequence":"first","affiliation":[]},{"given":"Shujiao","family":"Liao","sequence":"additional","affiliation":[]},{"given":"Wenyuan","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,5,7]]},"reference":[{"key":"3422_CR1","doi-asserted-by":"crossref","unstructured":"Ige, A.O. Tosin;Kolade: Enhancing border security and countering terrorism through computer vision: A field of artificial intelligence. Lecture Notes in Networks and Systems, 656\u2013666 (2023)","DOI":"10.1007\/978-3-031-21438-7_54"},{"key":"3422_CR2","doi-asserted-by":"crossref","unstructured":"Wu, Q., Liu, J., Feng, M.: Msdb-based cnn architecture for image dehazing in driverless cars. In: 2023 IEEE 3rd International Conference on Power, Electronics and Computer Applications (ICPECA), pp. 789\u2013794 (2023). IEEE","DOI":"10.1109\/ICPECA56706.2023.10076095"},{"key":"3422_CR3","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2021.105123","volume":"141","author":"H Hassan","year":"2022","unstructured":"Hassan, H., Ren, Z., Zhao, H., Huang, S., Li, D., Xiang, S., Kang, Y., Chen, S., Huang, B.: Review and classification of ai-enabled covid-19 ct imaging models based on computer vision tasks. Comput. Biol. Med. 141, 105123 (2022)","journal-title":"Comput. Biol. Med."},{"key":"3422_CR4","doi-asserted-by":"crossref","unstructured":"Kollias, D., Arsenos, A., Kollias, S.: Ai-mia: Covid-19 detection and severity analysis through medical imaging. In: Computer Vision\u2013ECCV 2022 Workshops: Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VII, pp. 677\u2013690 (2023). Springer","DOI":"10.1007\/978-3-031-25082-8_46"},{"key":"3422_CR5","doi-asserted-by":"crossref","unstructured":"Chen, H., Chu, X., Ren, Y., Zhao, X., Huang, K.: PeLK: Parameter-efficient Large Kernel ConvNets with Peripheral Convolution (2024)","DOI":"10.1109\/CVPR52733.2024.00531"},{"key":"3422_CR6","unstructured":"Howard, S., Norreys, P., D\u00f6pp, A.: CoordGate: Efficiently Computing Spatially-Varying Convolutions in Convolutional Neural Networks (2024)"},{"key":"3422_CR7","doi-asserted-by":"publisher","unstructured":"Chen, J., Kao, S.-h., He, H., Zhuo, W., Wen, S., Lee, C.-H., Chan, S.-H.G.: Run, don\u2019t walk: Chasing higher flops for faster neural networks. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 12021\u201312031 (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01157","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"3422_CR8","unstructured":"Zhang, X., Song, Y., Song, T., Yang, D., Ye, Y., Zhou, J., Zhang, L.: AKConv: Convolutional Kernel with Arbitrary Sampled Shapes and Arbitrary Number of Parameters (2023)"},{"key":"3422_CR9","doi-asserted-by":"crossref","unstructured":"Xiong, Y., Li, Z., Chen, Y., Wang, F., Zhu, X., Luo, J., Wang, W., Lu, T., Li, H., Qiao, Y., Lu, L., Zhou, J., Dai, J.: Efficient Deformable ConvNets: Rethinking Dynamic and Sparse Operator for Vision Applications (2024)","DOI":"10.1109\/CVPR52733.2024.00540"},{"key":"3422_CR10","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"3422_CR11","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast r-cnn. Proceedings of the IEEE International Conference on Computer Vision, 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"3422_CR12","doi-asserted-by":"crossref","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis and Machine Intelligence, 1137\u20131149 (2017)","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"3422_CR13","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"3422_CR14","doi-asserted-by":"crossref","unstructured":"Redmon, J., Farhadi, A.: Yolo9000: better, faster, stronger. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7263\u20137271 (2017)","DOI":"10.1109\/CVPR.2017.690"},{"key":"3422_CR15","unstructured":"Joseph Redmon and Ali Farhadi: Yolov3: An incremental improvement. CoRR arXiv:1804.02767 (2018)"},{"key":"3422_CR16","first-page":"15908","volume":"34","author":"K Han","year":"2021","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer. Adv. Neural. Inf. Process. Syst. 34, 15908\u201315919 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3422_CR17","unstructured":"Beal, J., Kim, E., Tzeng, E., Park, D.H., Zhai, A., Kislyuk, D.: Toward transformer-based object detection. arXiv preprint arXiv:2012.09958 (2020)"},{"key":"3422_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3422_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., Hu, H., Lin, Y., Yao, Z., Xie, Z., Wei, Y., Ning, J., Cao, Y., Zhang, Z., Dong, L., et al.: Swin transformer v2: Scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12009\u201312019 (2022)","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"3422_CR20","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., Fan, D.-P., Song, K., Liang, D., Lu, T., Luo, P., Shao, L.: Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"3422_CR21","unstructured":"Zhou, D., Kang, B., Jin, X., Yang, L., Lian, X., Jiang, Z., Hou, Q., Feng, J.: Deepvit: Towards deeper vision transformer. arXiv preprint arXiv:2103.11886 (2021)"},{"key":"3422_CR22","doi-asserted-by":"crossref","unstructured":"Yao, D., Shao, Y.: A data efficient transformer based on swin transformer. The Visual Computer, 1\u201310 (2023)","DOI":"10.1007\/s00371-023-02939-2"},{"issue":"8","key":"3422_CR23","doi-asserted-by":"publisher","first-page":"3235","DOI":"10.1007\/s00371-023-02966-z","volume":"39","author":"P Zhou","year":"2023","unstructured":"Zhou, P., Dong, X., Cao, J., Chen, Z.: Met: mesh transformer with an edge. Vis. Comput. 39(8), 3235\u20133246 (2023)","journal-title":"Vis. Comput."},{"key":"3422_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Ge, Y., Zhang, C., Bi, H.: Tprnet: camouflaged object detection via transformer-induced progressive refinement network. The Visual Computer, 1\u201315 (2022)","DOI":"10.1007\/s00371-022-02611-1"},{"issue":"6","key":"3422_CR25","doi-asserted-by":"publisher","first-page":"2277","DOI":"10.1007\/s00371-022-02413-5","volume":"39","author":"X Liang","year":"2023","unstructured":"Liang, X., Xu, L., Zhang, W., Zhang, Y., Liu, J., Liu, Z.: A convolution-transformer dual branch network for head-pose and occlusion facial expression recognition. Vis. Comput. 39(6), 2277\u20132290 (2023)","journal-title":"Vis. Comput."},{"issue":"7","key":"3422_CR26","doi-asserted-by":"publisher","first-page":"2671","DOI":"10.1007\/s00371-022-02485-3","volume":"39","author":"B Li","year":"2023","unstructured":"Li, B., Zhang, Y., Xu, H., Yin, B.: Ccst: crowd counting with swin transformer. Vis. Comput. 39(7), 2671\u20132682 (2023)","journal-title":"Vis. Comput."},{"key":"3422_CR27","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3422_CR28","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"3422_CR29","unstructured":"Zheng, M., Gao, P., Zhang, R., Li, K., Wang, X., Li, H., Dong, H.: End-to-end object detection with adaptive clustering transformer. arXiv preprint arXiv:2011.09315 (2020)"},{"key":"3422_CR30","doi-asserted-by":"crossref","unstructured":"Wang, T., Yuan, L., Chen, Y., Feng, J., Yan, S.: Pnp-detr: Towards efficient visual analysis with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4661\u20134670 (2021)","DOI":"10.1109\/ICCV48922.2021.00462"},{"key":"3422_CR31","unstructured":"Roh, B., Shin, J., Shin, W., Kim, S.: Sparse detr: Efficient end-to-end object detection with learnable sparsity. arXiv preprint arXiv:2111.14330 (2021)"},{"key":"3422_CR32","doi-asserted-by":"publisher","first-page":"5134","DOI":"10.1109\/TIP.2022.3193288","volume":"31","author":"W Tang","year":"2022","unstructured":"Tang, W., He, F., Liu, Y., Duan, Y.: Matr: Multimodal medical image fusion via multiscale adaptive transformer. IEEE Trans. Image Process. 31, 5134\u20135149 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"3422_CR33","doi-asserted-by":"crossref","unstructured":"Liu, W.T.H.: Ydtr: Infrared and visible image fusion via y-shape dynamic transformer. IEEE Transactions on Multimedia, 5413\u20135428 (2023)","DOI":"10.1109\/TMM.2022.3192661"},{"key":"3422_CR34","doi-asserted-by":"crossref","unstructured":"Sun, P., Zhang, R., Jiang, Y., Kong, T., Xu, C., Zhan, W., Tomizuka, M., Li, L., Yuan, Z., Wang, C., et al.: Sparse r-cnn: End-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14454\u201314463 (2021)","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"3422_CR35","doi-asserted-by":"crossref","unstructured":"Pan, X., Ge, C., Lu, R., Song, S., Chen, G., Huang, Z., Huang, G.: On the integration of self-attention and convolution. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 815\u2013825 (2022)","DOI":"10.1109\/CVPR52688.2022.00089"},{"key":"3422_CR36","unstructured":"Hou, Q., Lu, C.-Z., Cheng, M.-M., Feng, J.: Conv2former: A simple transformer-style convnet for visual recognition. arXiv preprint arXiv:2211.11943 (2022)"},{"key":"3422_CR37","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.-Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convnet for the 2020s. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11976\u201311986 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"3422_CR38","doi-asserted-by":"crossref","unstructured":"Liu, J., Huang, X., Song, G., Li, H., Liu, Y.: Uninet: Unified architecture search with convolution, transformer, and mlp. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXI, pp. 33\u201349 (2022). Springer","DOI":"10.1007\/978-3-031-19803-8_3"},{"key":"3422_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, H., Hu, W., Wang, X.: Parc-net: Position aware circular convolution with merits from convnets and transformer. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXVI, pp. 613\u2013630 (2022). Springer","DOI":"10.1007\/978-3-031-19809-0_35"},{"key":"3422_CR40","unstructured":"Yang, T., Zhang, H., Hu, W., Chen, C., Wang, X.: Fast-parc: Position aware global kernel for convnets and vits. arXiv preprint arXiv:2210.04020 (2022)"},{"key":"3422_CR41","doi-asserted-by":"crossref","unstructured":"Gao, Z., Wang, L., Han, B., Guo, S.: Adamixer: A fast-converging query-based object detector. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5364\u20135373 (2022)","DOI":"10.1109\/CVPR52688.2022.00529"},{"key":"3422_CR42","first-page":"24261","volume":"34","author":"IO Tolstikhin","year":"2021","unstructured":"Tolstikhin, I.O., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., Yung, J., Steiner, A., Keysers, D., Uszkoreit, J., et al.: Mlp-mixer: An all-mlp architecture for vision. Adv. Neural. Inf. Process. Syst. 34, 24261\u201324272 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3422_CR43","doi-asserted-by":"crossref","unstructured":"Zhang, G., Luo, Z., Yu, Y., Cui, K., Lu, S.: Accelerating detr convergence via semantic-aligned matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 949\u2013958 (2022)","DOI":"10.1109\/CVPR52688.2022.00102"},{"key":"3422_CR44","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"3422_CR45","unstructured":"Savvides, F.C.Z.H.-K.H.Z.: Enhanced training of query-based object detection via selective query recollection. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023)"},{"key":"3422_CR46","doi-asserted-by":"crossref","unstructured":"Gao, P., Zheng, M., Wang, X., Dai, J., Li, H.: Fast convergence of detr with spatially modulated co-attention. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3621\u20133630 (2021)","DOI":"10.1109\/ICCV48922.2021.00360"},{"key":"3422_CR47","doi-asserted-by":"crossref","unstructured":"Meng, D., Chen, X., Fan, Z., Zeng, G., Li, H., Yuan, Y., Sun, L., Wang, J.: Conditional detr for fast training convergence. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3651\u20133660 (2021)","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"3422_CR48","unstructured":"Chen, X., Wei, F., Zeng, G., Wang, J.: Conditional detr v2: Efficient detection transformer with box queries. arXiv preprint arXiv:2207.08914 (2022)"},{"key":"3422_CR49","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, X., Yang, T., Sun, J.: Anchor detr: Query design for transformer-based detector. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2567\u20132575 (2022)","DOI":"10.1609\/aaai.v36i3.20158"},{"key":"3422_CR50","unstructured":"Yao, Z., Ai, J., Li, B., Zhang, C.: Efficient detr: improving end-to-end object detector with dense prior. arXiv preprint arXiv:2104.01318 (2021)"},{"key":"3422_CR51","doi-asserted-by":"crossref","unstructured":"Dai, X., Chen, Y., Yang, J., Zhang, P., Yuan, L., Zhang, L.: Dynamic detr: End-to-end object detection with dynamic attention. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2988\u20132997 (2021)","DOI":"10.1109\/ICCV48922.2021.00298"},{"key":"3422_CR52","unstructured":"Melas-Kyriazi, L.: Do you even need attention? a stack of feed-forward layers does surprisingly well on imagenet. arXiv preprint arXiv:2105.02723 (2021)"},{"key":"3422_CR53","unstructured":"Ding, X., Xia, C., Zhang, X., Chu, X., Han, J., Ding, G.: Repmlp: Re-parameterizing convolutions into fully-connected layers for image recognition. arXiv preprint arXiv:2105.01883 (2021)"},{"key":"3422_CR54","first-page":"9204","volume":"34","author":"H Liu","year":"2021","unstructured":"Liu, H., Dai, Z., So, D., Le, Q.V.: Pay attention to mlps. Adv. Neural. Inf. Process. Syst. 34, 9204\u20139215 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3422_CR55","doi-asserted-by":"crossref","unstructured":"Guo, J., Tang, Y., Han, K., Chen, X., Wu, H., Xu, C., Xu, C., Wang, Y.: Hire-mlp: Vision mlp via hierarchical rearrangement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 826\u2013836 (2022)","DOI":"10.1109\/CVPR52688.2022.00090"},{"key":"3422_CR56","doi-asserted-by":"crossref","unstructured":"Tu, Z., Talebi, H., Zhang, H., Yang, F., Milanfar, P., Bovik, A., Li, Y.: Maxim: Multi-axis mlp for image processing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5769\u20135780 (2022)","DOI":"10.1109\/CVPR52688.2022.00568"},{"key":"3422_CR57","unstructured":"Luo, S.C.X.G.C.L.: Cyclemlp: A mlp-like architecture for dense visual predictions. IEEE Transactions on Pattern Analysis and Machine Intelligence, 1\u201317 (2023)"},{"key":"3422_CR58","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755 (2014). Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3422_CR59","doi-asserted-by":"crossref","unstructured":"Cordts, M., Omran, M., Ramos, S., Rehfeld, T., Enzweiler, M., Benenson, R., Franke, U., Roth, S., Schiele, B.: The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133223 (2016)","DOI":"10.1109\/CVPR.2016.350"},{"key":"3422_CR60","unstructured":"Zhang, G., Luo, Z., Yu, Y., Huang, J., Cui, K., Lu, S., Xing, E.P.: Semantic-aligned matching for enhanced detr convergence and multi-scale feature fusion. arXiv preprint arXiv:2207.14172 (2022)"},{"key":"3422_CR61","doi-asserted-by":"crossref","unstructured":"Qiu, H., Ma, Y., Li, Z., Liu, S., Sun, J.: Borderdet: Border feature for dense object detection. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16, pp. 549\u2013564 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_32"},{"key":"3422_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, H., Chang, H., Ma, B., Wang, N., Chen, X.: Dynamic r-cnn: Towards high quality object detection via dynamic training. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XV 16, pp. 260\u2013275 (2020). Springer","DOI":"10.1007\/978-3-030-58555-6_16"},{"key":"3422_CR63","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade r-cnn: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"3422_CR64","unstructured":"Zhu, B., Wang, J., Jiang, Z., Zong, F., Liu, S., Li, Z., Sun, J.: Autoassign: Differentiable label assignment for dense object detection. arXiv preprint arXiv:2007.03496 (2020)"},{"key":"3422_CR65","unstructured":"Liu, S., Li, F., Zhang, H., Yang, X., Qi, X., Su, H., Zhu, J., Zhang, L.: Dab-detr: Dynamic anchor boxes are better queries for detr. arXiv preprint arXiv:2201.12329 (2022)"},{"key":"3422_CR66","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"3422_CR67","unstructured":"Shum, F.L.Z.X.L.Z.M.N.-Y.: Mask dino: Towards a unified transformer-based framework for object detection and segmentation. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023)"},{"key":"3422_CR68","doi-asserted-by":"crossref","unstructured":"Sun, Z., Cao, S., Yang, Y., Kitani, K.M.: Rethinking transformer-based set prediction for object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3611\u20133620 (2021)","DOI":"10.1109\/ICCV48922.2021.00359"},{"key":"3422_CR69","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: YOLO by Ultralytics. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"3422_CR70","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.-Y.: Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605 (2022)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03422-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03422-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03422-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,3]],"date-time":"2025-02-03T12:37:20Z","timestamp":1738586240000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03422-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,7]]},"references-count":70,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["3422"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03422-2","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"type":"print","value":"0178-2789"},{"type":"electronic","value":"1432-2315"}],"subject":[],"published":{"date-parts":[[2024,5,7]]},"assertion":[{"value":"13 April 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 May 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors state that they have no known financial or personal relationships that could have influenced the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}