{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T10:59:16Z","timestamp":1774522756880,"version":"3.50.1"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T00:00:00Z","timestamp":1771200000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T00:00:00Z","timestamp":1771200000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Natural Science Foundation of Fujian Province China","award":["2024J01097"],"award-info":[{"award-number":["2024J01097"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s13042-025-02949-7","type":"journal-article","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T10:18:14Z","timestamp":1771237094000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["End-to-end multi-person pose estimation method with multi-scale feature reconstruction and enhancement"],"prefix":"10.1007","volume":"17","author":[{"given":"Zhongwei","family":"Lin","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanmin","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiafeng","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiancong","family":"Liang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tiandi","family":"Peng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ye","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,16]]},"reference":[{"key":"2949_CR1","doi-asserted-by":"crossref","unstructured":"Xiao B, Wu H, Wei Y (2018) Simple baselines for human pose estimation and tracking. In: Proceedings of the European conference on computer vision (ECCV), pp 466\u2013481","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"2949_CR2","doi-asserted-by":"publisher","unstructured":"Yang S, Quan Z, Nie M, Yang W (2021) TransPose: keypoint localization via transformer. In: 2021 IEEE\/CVF international conference on computer vision (ICCV). IEEE, Montreal, QC, Canada, pp 11782\u201311792. https:\/\/doi.org\/10.1109\/ICCV48922.2021.01159","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"2949_CR3","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang S, Wang Z, Yang S, Yang W, Xia S-T, Zhou E (2021) TokenPose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 11313\u201311322","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"2949_CR4","doi-asserted-by":"crossref","unstructured":"Cheng B, Xiao B, Wang J, Shi H, Huang TS, Zhang L (2020) HigherHRNet: scale-aware representation learning for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5386\u20135395","DOI":"10.1109\/CVPR42600.2020.00543"},{"key":"2949_CR5","doi-asserted-by":"crossref","unstructured":"Luo Z, Wang Z, Huang Y, Wang L, Tan T, Zhou E (2021) Rethinking the heatmap regression for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13264\u201313273","DOI":"10.1109\/CVPR46437.2021.01306"},{"issue":"5","key":"2949_CR6","doi-asserted-by":"publisher","first-page":"927","DOI":"10.1007\/s00530-021-00755-z","volume":"27","author":"L Yue","year":"2021","unstructured":"Yue L, Li J, Liu Q (2021) Body parts relevance learning via expectation-maximization for human pose estimation. Multimedia Syst 27(5):927\u2013939. https:\/\/doi.org\/10.1007\/s00530-021-00755-z","journal-title":"Multimedia Syst"},{"key":"2949_CR7","unstructured":"Zhou X, Wang D, Kr\u00e4henb\u00fchl P (2019) Objects as points. arXiv"},{"key":"2949_CR8","doi-asserted-by":"publisher","unstructured":"Wei F, Sun X, Li H, Wang J, Lin S (2020) Point-set anchors for object detection, instance segmentation and pose estimation. In: Vedaldi A, Bischof H, Brox T, Frahm J-M (eds) Computer vision\u2014ECCV 2020, vol 12355. Springer, Cham, pp 527\u2013544. https:\/\/doi.org\/10.1007\/978-3-030-58607-2_31","DOI":"10.1007\/978-3-030-58607-2_31"},{"key":"2949_CR9","doi-asserted-by":"crossref","unstructured":"Mao W, Tian Z, Wang X, Shen C (2021) FCPose: fully convolutional multi-person pose estimation with dynamic instance-aware convolutions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9034\u20139043","DOI":"10.1109\/CVPR46437.2021.00892"},{"key":"2949_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-024-02262-9","author":"Y Xie","year":"2024","unstructured":"Xie Y, Hong C, Zhuang W, Liu L, Li J (2024) HOGFormer: high-order graph convolution transformer for 3D human pose estimation. Int J Mach Learn Cybern. https:\/\/doi.org\/10.1007\/s13042-024-02262-9","journal-title":"Int J Mach Learn Cybern"},{"issue":"11","key":"2949_CR11","doi-asserted-by":"publisher","first-page":"5453","DOI":"10.1007\/s13042-024-02254-9","volume":"15","author":"H Ren","year":"2024","unstructured":"Ren H, Zhang X, Shi Y, Liang K (2024) Enhanced spatial-temporal dynamics in pose forecasting through multi-graph convolution networks. Int J Mach Learn Cybern 15(11):5453\u20135467. https:\/\/doi.org\/10.1007\/s13042-024-02254-9","journal-title":"Int J Mach Learn Cybern"},{"key":"2949_CR12","doi-asserted-by":"publisher","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: Vedaldi A, Bischof H, Brox T, Frahm J-M (eds) Computer vision\u2014ECCV 2020. Springer, Cham, pp 213\u2013229. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2949_CR13","unstructured":"Yang J, Zeng A, Liu S, Li F, Zhang R, Zhang L (2023) Explicit box detection unifies end-to-end multi-person pose estimation. In: International conference on learning representations"},{"key":"2949_CR14","doi-asserted-by":"crossref","unstructured":"Liu H, Chen Q, Tan Z, Liu J-J, Wang J, Su X, Li X, Yao K, Han J, Ding E, Zhao Y, Wang J (2023) Group pose: a simple baseline for end-to-end multi-person pose estimation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 15029\u201315038","DOI":"10.1109\/ICCV51070.2023.01380"},{"key":"2949_CR15","doi-asserted-by":"publisher","unstructured":"Huang Y-X, Liu H-I, Shuai H-H, Cheng W-H (2024) DQ-DETR: DETR with dynamic query for tiny object detection. arXiv. https:\/\/doi.org\/10.48550\/arXiv.2404.03507","DOI":"10.48550\/arXiv.2404.03507"},{"key":"2949_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.126645","volume":"271","author":"J Guo","year":"2025","unstructured":"Guo J, Du H, Hao X, Zhang M (2025) CFET: a cross-fusion enhanced transformer for visible-infrared person re-identification. Expert Syst Appl 271:126645. https:\/\/doi.org\/10.1016\/j.eswa.2025.126645","journal-title":"Expert Syst Appl"},{"issue":"2","key":"2949_CR17","doi-asserted-by":"publisher","DOI":"10.1088\/1402-4896\/ad9d8b","volume":"100","author":"G Long","year":"2025","unstructured":"Long G, Verma V, Jiang D, Yang Y, Ahmad M (2025) A LE-controlled 4D non-degenerate hyperchaotic system and STP-CS model based efficient image encryption algorithm. Phys Scr 100(2):025228. https:\/\/doi.org\/10.1088\/1402-4896\/ad9d8b","journal-title":"Phys Scr"},{"key":"2949_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112075","volume":"299","author":"S Gu","year":"2024","unstructured":"Gu S, Zhang M, Xiao Q, Shi W (2024) Cascaded matching based on detection box area for multi-object tracking. Knowl Based Syst 299:112075. https:\/\/doi.org\/10.1016\/j.knosys.2024.112075","journal-title":"Knowl Based Syst"},{"key":"2949_CR19","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2021) Deformable DETR: deformable transformers for end-to-end object detection. arXiv"},{"key":"2949_CR20","doi-asserted-by":"publisher","unstructured":"Wang R, Shivanna R, Cheng D, Jain S, Lin D, Hong L, Chi E (2021) DCN V2: improved deep & cross network and practical lessons for web-scale learning to rank systems. In: Proceedings of the web conference 2021. ACM, Ljubljana Slovenia, pp 1785\u20131797. https:\/\/doi.org\/10.1145\/3442381.3450078","DOI":"10.1145\/3442381.3450078"},{"key":"2949_CR21","doi-asserted-by":"crossref","unstructured":"Wang W, Dai J, Chen Z, Huang Z, Li Z, Zhu X, Hu X, Lu T, Lu L, Li H, Wang X, Qia, Y (2023) InternImage: exploring large-scale vision foundation models with deformable convolutions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14408\u201314419","DOI":"10.1109\/CVPR52729.2023.01385"},{"key":"2949_CR22","doi-asserted-by":"publisher","unstructured":"Yao Z, Ai J, Li B, Zhang C (2021) Efficient DETR: improving end-to-end object detector with dense prior. arXiv. https:\/\/doi.org\/10.48550\/arXiv.2104.01318","DOI":"10.48550\/arXiv.2104.01318"},{"key":"2949_CR23","unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu J, Ni L, Shum H-Y (2022) DINO: DETR with improved denoising anchor boxes for end-to-end object detection. In: The eleventh international conference on learning representations"},{"key":"2949_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/s13042-024-02352-8","author":"S Peng","year":"2024","unstructured":"Peng S, Xiong C, Liu L, Yang LT, Chen J (2024) GRPIC: an end-to-end image captioning model using three visual features. Int J Mach Learn Cybern. https:\/\/doi.org\/10.1007\/s13042-024-02352-8","journal-title":"Int J Mach Learn Cybern"},{"issue":"8","key":"2949_CR25","doi-asserted-by":"publisher","first-page":"7259","DOI":"10.1109\/TCSVT.2024.3376690","volume":"34","author":"B Sun","year":"2024","unstructured":"Sun B, Wang Z, Wang S, Cheng Y, Ning J (2024) Bidirectional interaction of CNN and transformer feature for visual tracking. IEEE Trans Circuits Syst Video Technol 34(8):7259\u20137271. https:\/\/doi.org\/10.1109\/TCSVT.2024.3376690","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2949_CR26","doi-asserted-by":"publisher","first-page":"5073","DOI":"10.1109\/TIP.2024.3453028","volume":"33","author":"S Wang","year":"2024","unstructured":"Wang S, Wang Z, Sun Q, Cheng G, Ning J (2024) Modeling of multiple spatial-temporal relations for robust visual object tracking. IEEE Trans Image Process 33:5073\u20135085. https:\/\/doi.org\/10.1109\/TIP.2024.3453028","journal-title":"IEEE Trans Image Process"},{"issue":"1","key":"2949_CR27","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1049\/cvi2.12235","volume":"18","author":"S Wang","year":"2024","unstructured":"Wang S, Han Y, Sun B, Ning J (2024) IoUNet++: spatial cross-layer interaction-based bounding box regression for visual tracking. IET Comput Vis 18(1):177\u2013189. https:\/\/doi.org\/10.1049\/cvi2.12235","journal-title":"IET Comput Vis"},{"key":"2949_CR28","doi-asserted-by":"publisher","unstructured":"Huang J, Cheng Y, Yu M, Han G, Li J, Zhang J, Wang S, Gu X (2025) Inter - diffusion generation model of speakers and listeners for effective communication. In: Proceedings of the 2025 international conference on multimedia retrieval. ICMR \u201925. Association for Computing Machinery, New York, NY, USA, pp 497\u2013505. https:\/\/doi.org\/10.1145\/3731715.3733366","DOI":"10.1145\/3731715.3733366"},{"key":"2949_CR29","doi-asserted-by":"crossref","unstructured":"Shi D, Wei X, Li L, Ren Y, Tan W (2022) End-to-end multi-person pose estimation with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11069\u201311078","DOI":"10.1109\/CVPR52688.2022.01079"},{"key":"2949_CR30","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) CBAM: convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"2949_CR31","doi-asserted-by":"publisher","unstructured":"Park J, Woo S, Lee J-Y, Kweon IS (2018) BAM: bottleneck attention module. arXiv. https:\/\/doi.org\/10.48550\/arXiv.1807.06514","DOI":"10.48550\/arXiv.1807.06514"},{"key":"2949_CR32","doi-asserted-by":"crossref","unstructured":"Misra D, Nalamada T, Arasanipalai AU, Hou Q (2021) Rotate to attend: convolutional triplet attention module. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 3139\u20133148","DOI":"10.1109\/WACV48630.2021.00318"},{"issue":"1","key":"2949_CR33","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/s00530-022-00981-z","volume":"29","author":"S Liu","year":"2023","unstructured":"Liu S, He N, Wang C, Yu H, Han W (2023) Lightweight human pose estimation algorithm based on polarized self-attention. Multimedia Syst 29(1):197\u2013210. https:\/\/doi.org\/10.1007\/s00530-022-00981-z","journal-title":"Multimedia Syst"},{"key":"2949_CR34","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D, Wang J (2019) Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5693\u20135703","DOI":"10.1109\/CVPR.2019.00584"},{"key":"2949_CR35","doi-asserted-by":"crossref","unstructured":"Ke L, Chang M-C, Qi H, Lyu S (2018) Multi-scale structure-aware network for human pose estimation. In: Proceedings of the European conference on computer vision (ECCV), pp 713\u2013728","DOI":"10.1007\/978-3-030-01216-8_44"},{"key":"2949_CR36","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3179379","volume":"60","author":"P He","year":"2022","unstructured":"He P, Jiao L, Shang R, Wang S, Liu X, Quan D, Yang K, Zhao D (2022) MANet: multi-scale aware-relation network for semantic segmentation in aerial scenes. IEEE Trans Geosci Remote Sens 60:1\u201315. https:\/\/doi.org\/10.1109\/TGRS.2022.3179379","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"2949_CR37","doi-asserted-by":"crossref","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3146\u20133154","DOI":"10.1109\/CVPR.2019.00326"},{"key":"2949_CR38","doi-asserted-by":"publisher","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft COCO: common objects in context. In: Fleet D, Pajdla T, Schiele B, Tuytelaars T (eds.) Computer vision\u2014ECCV 2014, vol 8693. Springer, Cham, pp 740\u2013755. https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2949_CR39","doi-asserted-by":"crossref","unstructured":"Li J, Wang C, Zhu H, Mao Y, Fang H-S, Lu C (2019) CrowdPose: efficient crowded scenes pose estimation and a new benchmark. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10863\u201310872","DOI":"10.1109\/CVPR.2019.01112"},{"key":"2949_CR40","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Dollar P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE international conference on computer vision, pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"2949_CR41","doi-asserted-by":"crossref","unstructured":"Li K, Wang S, Zhang X, Xu Y, Xu W, Tu Z (2021) Pose recognition with cascade transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1944\u20131953","DOI":"10.1109\/CVPR46437.2021.00198"},{"key":"2949_CR42","doi-asserted-by":"crossref","unstructured":"Lu P, Jiang T, Li Y, Li X, Chen K, Yang W (2024) RTMO: towards high-performance one-stage real-time multi-person pose estimation. arXiv","DOI":"10.1109\/CVPR52733.2024.00148"},{"key":"2949_CR43","doi-asserted-by":"crossref","unstructured":"Geng Z, Sun K, Xiao B, Zhang Z, Wang J (2021) Bottom-up human pose estimation via disentangled keypoint regression. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14676\u201314686","DOI":"10.1109\/CVPR46437.2021.01444"},{"issue":"3","key":"2949_CR44","doi-asserted-by":"publisher","first-page":"2813","DOI":"10.1609\/aaai.v36i3.20185","volume":"36","author":"Y Xiao","year":"2022","unstructured":"Xiao Y, Wang XJ, Yu D, Wang G, Zhang Q, He M (2022) AdaptivePose: human parts as adaptive points. Proc AAAI Conf Artif Intell 36(3):2813\u20132821. https:\/\/doi.org\/10.1609\/aaai.v36i3.20185","journal-title":"Proc AAAI Conf Artif Intell"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02949-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-025-02949-7","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-025-02949-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T10:03:12Z","timestamp":1774519392000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-025-02949-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,16]]},"references-count":44,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["2949"],"URL":"https:\/\/doi.org\/10.1007\/s13042-025-02949-7","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,16]]},"assertion":[{"value":"7 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"95"}}