{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T06:01:17Z","timestamp":1773813677917,"version":"3.50.1"},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100010255","name":"Guangxi Key Laboratory of Automatic Detection Technology and Instrument Foundation","doi-asserted-by":"publisher","award":["YQ24204"],"award-info":[{"award-number":["YQ24204"]}],"id":[{"id":"10.13039\/501100010255","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004607","name":"Natural Science Foundation of Guangxi Province","doi-asserted-by":"publisher","award":["2024GXNSFAA010524"],"award-info":[{"award-number":["2024GXNSFAA010524"]}],"id":[{"id":"10.13039\/501100004607","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100012547","name":"Natural Science Foundation of Guangxi Zhuang Autonomous Region","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100012547","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62462005"],"award-info":[{"award-number":["62462005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Digital Signal Processing"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1016\/j.dsp.2025.105668","type":"journal-article","created":{"date-parts":[[2025,10,23]],"date-time":"2025-10-23T01:53:58Z","timestamp":1761184438000},"page":"105668","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PE","title":["TransPose++: Enhanced keypoint localization via multi-scale feature fusion and efficient convolutional transformers"],"prefix":"10.1016","volume":"168","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7989-8543","authenticated-orcid":false,"given":"Hai-Sheng","family":"Li","sequence":"first","affiliation":[]},{"given":"Cuijuan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Shuxiang","family":"Song","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Hu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.dsp.2025.105668_bib0001","article-title":"Real-Time Multi-Camera 3D Human Pose Estimation at the Edge for Industrial Applications","volume":"252","author":"Boldo","year":"2024","journal-title":"Expert Systems. Appli"},{"issue":"3","key":"10.1016\/j.dsp.2025.105668_bib0002","doi-asserted-by":"crossref","first-page":"1195","DOI":"10.1109\/TAFFC.2020.2981446","article-title":"Deep facial expression recognition: a survey","volume":"13","author":"Li","year":"2022","journal-title":"IEEE Trans. Affect. Comput."},{"key":"10.1016\/j.dsp.2025.105668_bib0003","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"3334","article-title":"Panoptic studio: a massively multiview system for social motion capture","author":"Joo","year":"2015"},{"issue":"6","key":"10.1016\/j.dsp.2025.105668_bib0004","doi-asserted-by":"crossref","first-page":"1317","DOI":"10.1109\/TPAMI.2019.2899570","article-title":"End-to-end active object tracking and its real-world deployment via reinforcement learning","volume":"42","author":"Luo","year":"2019","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"12","key":"10.1016\/j.dsp.2025.105668_bib0005","doi-asserted-by":"crossref","first-page":"2633","DOI":"10.1109\/TVCG.2015.2513408","article-title":"Pose estimation for augmented reality: a hands-on survey","volume":"22","author":"Marchand","year":"2015","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"10.1016\/j.dsp.2025.105668_bib0006","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5686","article-title":"Deep high-resolution representation learning for human pose estimation","author":"Wang","year":"2019"},{"key":"10.1016\/j.dsp.2025.105668_bib0007","series-title":"Technical Report","article-title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.dsp.2025.105668_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.dsp.2023.104219","article-title":"Multi-order spatial interaction network for human pose estimation","volume":"142","author":"Wang","year":"2023","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.dsp.2025.105668_bib0009","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"11188","article-title":"TransPose: keypoint localization via transformer","author":"Yang","year":"2021"},{"key":"10.1016\/j.dsp.2025.105668_bib0010","series-title":"Technical Report","article-title":"Multi-Scale Context Aggregation by Dilated convolutions","author":"Yu","year":"2015"},{"key":"10.1016\/j.dsp.2025.105668_bib0011","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"2117","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0012","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"1251","article-title":"Xception: deep learning with depthwise separable convolutions","author":"Chollet","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0013","series-title":"Technical Report","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"10.1016\/j.dsp.2025.105668_bib0014","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"5836","article-title":"Net: keypoint detection by handcrafted and learned CNN filters","author":"Barroso-Laguna","year":"2019"},{"key":"10.1016\/j.dsp.2025.105668_bib0015","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"483","article-title":"Stacked hourglass networks for human pose estimation","author":"Newell","year":"2016"},{"key":"10.1016\/j.dsp.2025.105668_bib0016","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"7103","article-title":"Cascaded pyramid network for multi-person pose estimation","author":"Chen","year":"2018"},{"key":"10.1016\/j.dsp.2025.105668_bib0017","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2881","article-title":"Pyramid scene parsing network","author":"Zhao","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0018","series-title":"Technical Report","article-title":"Rethinking Atrous Convolution for Semantic Image Segmentation","author":"Chen","year":"2017"},{"issue":"9","key":"10.1016\/j.dsp.2025.105668_bib0019","doi-asserted-by":"crossref","first-page":"1734","DOI":"10.1109\/TPAMI.2015.2496141","article-title":"Discriminative unsupervised feature learning with exemplar convolutional neural networks","volume":"38","author":"Dosovitskiy","year":"2016","journal-title":"IEEE TPAMI"},{"key":"10.1016\/j.dsp.2025.105668_bib0020","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"213","article-title":"End-to-end object detection with transformers","author":"Carion","year":"2020"},{"key":"10.1016\/j.dsp.2025.105668_bib0021","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"11313","article-title":"Learning keypoint tokens for human pose estimation","author":"Li","year":"2021"},{"key":"10.1016\/j.dsp.2025.105668_bib0022","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"14915","article-title":"HRFormer: High-resolution transformer for human pose estimation","author":"Cheng","year":"2021"},{"issue":"4","key":"10.1016\/j.dsp.2025.105668_bib0023","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs","volume":"40","author":"Chen","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.dsp.2025.105668_bib0024","series-title":"Proceedings of the 2019 IEEE International Conference on Image Processing","first-page":"1440","article-title":"ACNet: attention based network to exploit complementary features for RGBD semantic segmentation","author":"Hu","year":"2019"},{"key":"10.1016\/j.dsp.2025.105668_bib0025","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"6848","article-title":"MobileNets: efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0026","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"1580","article-title":"GhostNet: more features from cheap operations","author":"Han","year":"2020"},{"key":"10.1016\/j.dsp.2025.105668_bib0027","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"14116","article-title":"MobileViT: light-weight, general-purpose vision transformer for mobile devices","author":"Mehta","year":"2021"},{"key":"10.1016\/j.dsp.2025.105668_bib0028","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1251","article-title":"Xception: deep learning with depthwise separable convolutions","author":"Chollet","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0029","series-title":"Technical Report","article-title":"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications","author":"Howard","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0030","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"3","article-title":"CBAM: convolutional block attention module","author":"Woo","year":"2018"},{"key":"10.1016\/j.dsp.2025.105668_bib0031","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"11065","article-title":"Second-order attention network for single image super-resolution","author":"Dai","year":"2019"},{"key":"10.1016\/j.dsp.2025.105668_bib0032","series-title":"Technical Report","article-title":"LocalViT: Bringing Locality to Vision Transformers","author":"Li","year":"2021"},{"key":"10.1016\/j.dsp.2025.105668_bib0033","first-page":"4510","article-title":"Inverted residuals and linear bottlenecks","volume":"2","author":"Sandler","year":"2018","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"issue":"1","key":"10.1016\/j.dsp.2025.105668_bib0034","first-page":"1929","article-title":"Dropout: a simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.dsp.2025.105668_bib0035","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"1","article-title":"Going deeper with convolutions","author":"Szegedy","year":"2015"},{"issue":"4","key":"10.1016\/j.dsp.2025.105668_bib0036","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs","volume":"40","author":"Chen","year":"2018","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.dsp.2025.105668_bib0037","series-title":"European Conference on Computer Vision (ECCV)","first-page":"740","article-title":"Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.dsp.2025.105668_bib0038","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"3686","article-title":"2D Human pose estimation: new benchmark and state of the art analysis","author":"Andriluka","year":"2014"},{"key":"10.1016\/j.dsp.2025.105668_bib0039","series-title":"Technical Report","article-title":"Fixing Weight Decay Regularization in Adam","author":"Loshchilov","year":"2017"},{"key":"10.1016\/j.dsp.2025.105668_bib0040","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"466","article-title":"Simple baselines for human pose estimation and tracking","author":"Xiao","year":"2018"},{"issue":"3","key":"10.1016\/j.dsp.2025.105668_bib0041","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s00138-022-01352-4","article-title":"Human pose estimation based on lightweight basicblock","volume":"34","author":"Li","year":"2023","journal-title":"Mach. Vis. Appl."},{"issue":"5","key":"10.1016\/j.dsp.2025.105668_bib0042","doi-asserted-by":"crossref","first-page":"6169","DOI":"10.1007\/s11227-023-05691-5","article-title":"IDPNet: a light-weight network and its variants for human pose estimation","volume":"80","author":"Liu","year":"2024","journal-title":"J. Supercomput."},{"issue":"12","key":"10.1016\/j.dsp.2025.105668_bib0043","doi-asserted-by":"crossref","first-page":"17269","DOI":"10.1007\/s11227-024-06125-6","article-title":"Multi-scale high-resolution network for human pose estimation","volume":"80","author":"Wang","year":"2024","journal-title":"J. Supercomput."},{"key":"10.1016\/j.dsp.2025.105668_bib0044","series-title":"Advances in Neural Information Processing Systems (NeurIPS)","first-page":"38571","article-title":"Simple vision transformer baselines for human pose estimation","author":"Xu","year":"2022"},{"key":"10.1016\/j.dsp.2025.105668_bib0045","unstructured":"T. Jiang, P. Lu, L. Zhang, N. Ma, R. Han, C. Lyu, Y. Li, K. Chen, Rtmpose, Real-time multi-person pose estimation based on MMPose, Technical Report, arXiv preprint, 2023."},{"issue":"28","key":"10.1016\/j.dsp.2025.105668_bib0046","article-title":"HRPVT: High-Resolution pyramid vision transformer for medium and small-scale human pose estimation","volume":"619","author":"Xu","year":"2025","journal-title":"Neurocomputing"}],"container-title":["Digital Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1051200425006906?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1051200425006906?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T05:03:42Z","timestamp":1773810222000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1051200425006906"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1]]},"references-count":46,"alternative-id":["S1051200425006906"],"URL":"https:\/\/doi.org\/10.1016\/j.dsp.2025.105668","relation":{},"ISSN":["1051-2004"],"issn-type":[{"value":"1051-2004","type":"print"}],"subject":[],"published":{"date-parts":[[2026,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"TransPose++: Enhanced keypoint localization via multi-scale feature fusion and efficient convolutional transformers","name":"articletitle","label":"Article Title"},{"value":"Digital Signal Processing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.dsp.2025.105668","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"105668"}}