{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T18:15:12Z","timestamp":1772907312020,"version":"3.50.1"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"7-8","license":[{"start":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T00:00:00Z","timestamp":1737158400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T00:00:00Z","timestamp":1737158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["Nos. 62276073, 61966004"],"award-info":[{"award-number":["Nos. 62276073, 61966004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["Nos. 62276073, 61966004"],"award-info":[{"award-number":["Nos. 62276073, 61966004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["Nos. 62276073, 61966004"],"award-info":[{"award-number":["Nos. 62276073, 61966004"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Guangxi Natural Science Foundation","award":["No. 2019GXNSFDA245018"],"award-info":[{"award-number":["No. 2019GXNSFDA245018"]}]},{"name":"Guangxi Natural Science Foundation","award":["No. 2019GXNSFDA245018"],"award-info":[{"award-number":["No. 2019GXNSFDA245018"]}]},{"name":"Guangxi Natural Science Foundation","award":["No. 2019GXNSFDA245018"],"award-info":[{"award-number":["No. 2019GXNSFDA245018"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1007\/s13042-025-02541-z","type":"journal-article","created":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T05:25:00Z","timestamp":1737177900000},"page":"4767-4782","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["PF-DETR: Instance position and local feature enhancement for DETR"],"prefix":"10.1007","volume":"16","author":[{"given":"Xinfang","family":"Zhong","sequence":"first","affiliation":[]},{"given":"Wenlan","family":"Kuang","sequence":"additional","affiliation":[]},{"given":"Zhixin","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,18]]},"reference":[{"key":"2541_CR1","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s11263-019-01247-4","volume":"128","author":"L Liu","year":"2020","unstructured":"Liu L, Ouyang W, Wang X, Fieguth P, Chen J, Liu X et al (2020) Deep learning for generic object detection: a survey. Int J Comput Vis 128:261\u2013318","journal-title":"Int J Comput Vis"},{"key":"2541_CR2","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade R-CNN: Delving into high quality object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"2541_CR3","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, et\u00a0al. (2020) End-to-end object detection with Transformers. In: Proceedings of European Conference on Computer Vision, pp. 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2541_CR4","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, et\u00a0al. (2017) Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008"},{"issue":"19","key":"2541_CR5","doi-asserted-by":"publisher","first-page":"22488","DOI":"10.1007\/s10489-023-04799-8","volume":"53","author":"B Xiao","year":"2023","unstructured":"Xiao B, Nguyen M, Yan WQ (2023) Fruit ripeness identification using transformers. Appl Intell 53(19):22488\u201322499","journal-title":"Appl Intell"},{"issue":"6","key":"2541_CR6","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster R-CNN: Towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2541_CR7","unstructured":"Liu S, Li F, Zhang H, et\u00a0al. (2022) Dab-detr: Dynamic anchor boxes are better queries for detr. In: Proceedings of the International Conference on Learning Representations, pp. 1\u20138"},{"key":"2541_CR8","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"2541_CR9","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 9626\u20139635","DOI":"10.1109\/ICCV.2019.00972"},{"issue":"10","key":"2541_CR10","doi-asserted-by":"publisher","first-page":"4589","DOI":"10.1007\/s13042-024-02175-7","volume":"15","author":"Y Wen","year":"2024","unstructured":"Wen Y, Wang L (2024) Yolo-sd: simulated feature fusion for few-shot industrial defect detection based on yolov8 and stable diffusion. Int J Mach Learn Cybern 15(10):4589\u20134601","journal-title":"Int J Mach Learn Cybern"},{"key":"2541_CR11","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988","DOI":"10.1109\/ICCV.2017.322"},{"key":"2541_CR12","unstructured":"Xu S, Wang X, Lv W, Chang Q, Cui C, Deng K, Wang G, Dang Q, Wei S, Du Y, et al (2022) Pp-yoloe: an evolved version of yolo. arXiv preprint arXiv:2203.16250"},{"key":"2541_CR13","unstructured":"Wang C-Y, Yeh I-H, Liao H-YM (2021) You only learn one representation: Unified network for multiple tasks. arXiv preprint arXiv:2105.04206"},{"key":"2541_CR14","unstructured":"Zhou X, Koltun V, Kr\u00e4henb\u00fchl P (2021) Probabilistic two-stage detection. arXiv preprint arXiv:2103.07461"},{"issue":"22","key":"2541_CR15","doi-asserted-by":"publisher","first-page":"26781","DOI":"10.1007\/s10489-023-04927-4","volume":"53","author":"E Haugsdal","year":"2023","unstructured":"Haugsdal E, Aune E, Ruocco M (2023) Persistence initialization: a novel adaptation of the transformer architecture for time series forecasting. Appl Intell 53(22):26781\u201326796","journal-title":"Appl Intell"},{"key":"2541_CR16","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, et\u00a0al. (2021) Swin Transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 9992\u201310002","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2541_CR17","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D et al (2022) PVT v2: improved baselines with pyramid vision transformer. Comput Vis Med 8:415\u2013424","journal-title":"Comput Vis Med"},{"key":"2541_CR18","doi-asserted-by":"crossref","unstructured":"Yao T, Pan Y, Li Y, Ngo C-W, Mei T (2022) Wave-ViT: Unifying wavelet and Transformers for visual representation learning. In: Proceedings of European Conference on Computer Vision, pp. 328\u2013345","DOI":"10.1007\/978-3-031-19806-9_19"},{"issue":"1","key":"2541_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103154","volume":"60","author":"X Xie","year":"2023","unstructured":"Xie X, Li Z, Tang Z, Yao D, Ma H (2023) Unifying knowledge iterative dissemination and relational reconstruction network for image-text matching. Inform Process Manage 60(1):103154","journal-title":"Inform Process Manage"},{"key":"2541_CR20","unstructured":"Zhu X, Su W, Lu L, et\u00a0al. (2021) Deformable DETR: deformable transformers for end-to-end object detection. In: Proceedings of the International Conference on Learning Representations, pp. 1\u20138"},{"key":"2541_CR21","doi-asserted-by":"crossref","unstructured":"Sun Z, Cao S, Yang Y, Kitani KM (2021) Rethinking transformer-based set prediction for object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3611\u20133620","DOI":"10.1109\/ICCV48922.2021.00359"},{"key":"2541_CR22","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, et\u00a0al. (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 936\u2013944","DOI":"10.1109\/CVPR.2017.106"},{"key":"2541_CR23","doi-asserted-by":"crossref","unstructured":"Meng D, Chen X, Fan Z, et\u00a0al. (2021) Conditional DETR for fast training convergence. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3631\u20133640","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"2541_CR24","unstructured":"Yao Z, Ai J, Li B, Zhang C (2021) Efficient detr: improving end-to-end object detector with dense prior. arXiv preprint arXiv:2104.01318"},{"key":"2541_CR25","doi-asserted-by":"crossref","unstructured":"Li F, Zhang H, Liu S, et\u00a0al. (2022) DN-DETR: accelerate detr training by introducing query denoising. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 13609\u201313617","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"2541_CR26","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang R, Shan S, Chen X (2018) Structure inference net: Object detection using scene-level context and instance-level relationships. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6985\u20136994","DOI":"10.1109\/CVPR.2018.00730"},{"key":"2541_CR27","unstructured":"Herdade S, Kappeler A, Boakye K, Soares J (2019) Image captioning: transforming objects into words. In: Advances in Neural Information Processing Systems, pp. 11137\u201311147"},{"key":"2541_CR28","doi-asserted-by":"crossref","unstructured":"Zhu J, Li Z, Zeng Y, Wei J, Ma H (2022) Image-text matching with fine-grained relational dependency and bidirectional attention-based generative networks. In: Proceedings of the ACM International Conference on Multimedia, pp. 395\u2013403","DOI":"10.1145\/3503161.3548058"},{"issue":"2","key":"2541_CR29","doi-asserted-by":"publisher","first-page":"1489","DOI":"10.1109\/TPAMI.2022.3164083","volume":"45","author":"Y Li","year":"2023","unstructured":"Li Y, Yao T, Pan Y et al (2023) Contextual Transformer networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 45(2):1489\u20131500","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2541_CR30","doi-asserted-by":"publisher","first-page":"7647","DOI":"10.1109\/TMM.2022.3224663","volume":"25","author":"Y Feng","year":"2022","unstructured":"Feng Y, Yu J, Chen F et al (2022) Visible-infrared person re-identification via cross-modality interaction transformer. IEEE Trans Multimed 25:7647\u20137659","journal-title":"IEEE Trans Multimed"},{"key":"2541_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110981","volume":"158","author":"Y Feng","year":"2025","unstructured":"Feng Y, Chen F, Yu J et al (2025) Homogeneous and heterogeneous relational graph for visible-infrared person re-identification. Pattern Recogn 158:110981","journal-title":"Pattern Recogn"},{"key":"2541_CR32","doi-asserted-by":"crossref","unstructured":"Zhu H, Ke W, Li D, et\u00a0al. (2022) Dual cross-attention learning for fine-grained visual categorization and object re-identification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4692\u20134702","DOI":"10.1109\/CVPR52688.2022.00465"},{"issue":"2","key":"2541_CR33","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","volume":"42","author":"T-Y Lin","year":"2020","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2020) Focal loss for dense object detection. IEEE Trans Pattern Anal Mach Intell 42(2):318\u2013327","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2541_CR34","doi-asserted-by":"crossref","unstructured":"Rezatofighi H, Tsoi N, Gwak J, Sadeghian A, Reid I, Savarese S (2019) Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 658\u2013666","DOI":"10.1109\/CVPR.2019.00075"},{"key":"2541_CR35","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D (2014) Microsoft COCO: common objects in context. In: Proceedings of European Conference on Computer Vision, pp. 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2541_CR36","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"2541_CR37","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, et\u00a0al. (2009) ImageNet: A large-scale hierarchical image database. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2541_CR38","doi-asserted-by":"crossref","unstructured":"Sun P, Zhang R, Jiang Y, et\u00a0al. (2021) Sparse r-cnn: End-to-end object detection with learnable proposals. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 14454\u201314463","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"2541_CR39","doi-asserted-by":"crossref","unstructured":"Wang Y, Zhang X, Yang T, Sun J (2022) Anchor detr: Query design for transformer-based detector. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2567\u20132575","DOI":"10.1609\/aaai.v36i3.20158"},{"key":"2541_CR40","doi-asserted-by":"crossref","unstructured":"Gao P, Zheng M, Wang X, Dai J, Li H (2021) Fast convergence of detr with spatially modulated co-attention. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3621\u20133630","DOI":"10.1109\/ICCV48922.2021.00360"},{"key":"2541_CR41","unstructured":"Roh B, Shin J, Shin W, Kim S (2022) Sparse detr: efficient end-to-end object detection with learnable sparsity. In: Proceedings of the International Conference on Learning Representations, pp. 1\u20138"},{"issue":"7","key":"2541_CR42","first-page":"8284","volume":"45","author":"C Li","year":"2023","unstructured":"Li C, Zhou H, Liu Y, Yang C et al (2023) Detection-friendly dehazing: object detection in real-world hazy scenes. IEEE Trans Pattern Anal Mach Intell 45(7):8284\u20138295","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2541_CR43","doi-asserted-by":"publisher","first-page":"2593","DOI":"10.1109\/TIP.2023.3270801","volume":"32","author":"H Zhou","year":"2023","unstructured":"Zhou H, Tian C, Zhang Z, Li C et al (2023) Position-aware relation learning for rgb-thermal salient object detection. IEEE Trans Image Process 32:2593\u20132607","journal-title":"IEEE Trans Image Process"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02541-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-025-02541-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02541-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T03:18:17Z","timestamp":1757128697000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-025-02541-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,18]]},"references-count":43,"journal-issue":{"issue":"7-8","published-print":{"date-parts":[[2025,8]]}},"alternative-id":["2541"],"URL":"https:\/\/doi.org\/10.1007\/s13042-025-02541-z","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,18]]},"assertion":[{"value":"10 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare there is no Conflict of interest regarding the publication of this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not Applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not Applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not Applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}