{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:57:25Z","timestamp":1778083045001,"version":"3.51.4"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"5-6","license":[{"start":{"date-parts":[[2024,12,24]],"date-time":"2024-12-24T00:00:00Z","timestamp":1734998400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,24]],"date-time":"2024-12-24T00:00:00Z","timestamp":1734998400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62173285"],"award-info":[{"award-number":["62173285"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Fujian Provincial Natural Science Foundation of China","award":["2021J011181,2022J011236"],"award-info":[{"award-number":["2021J011181,2022J011236"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s13042-024-02483-y","type":"journal-article","created":{"date-parts":[[2024,12,24]],"date-time":"2024-12-24T11:53:06Z","timestamp":1735041186000},"page":"3809-3817","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["A simple and efficient channel MLP on token for human pose estimation"],"prefix":"10.1007","volume":"16","author":[{"given":"Jianglong","family":"Huang","sequence":"first","affiliation":[]},{"given":"Chaoqun","family":"Hong","sequence":"additional","affiliation":[]},{"given":"Rongsheng","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Lang","family":"Ran","sequence":"additional","affiliation":[]},{"given":"Jialong","family":"Qian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,24]]},"reference":[{"key":"2483_CR1","doi-asserted-by":"crossref","unstructured":"Andriluka M, Pishchulin L, Gehler P, Schiele B (2014) 2d human pose estimation: New benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on computer Vision and Pattern Recognition, pp 3686\u20133693","DOI":"10.1109\/CVPR.2014.471"},{"key":"2483_CR2","doi-asserted-by":"crossref","unstructured":"Cai Y, Wang Z, Luo Z, Yin B, Du A, Wang H, Sun J (2020) Learning delicate local representations for multi-person pose estimation. In: Computer vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part III 16. Springer International Publishing, pp 455\u2013472","DOI":"10.1007\/978-3-030-58580-8_27"},{"key":"2483_CR3","doi-asserted-by":"crossref","unstructured":"Cao Z, Simon T, Wei S E, Sheikh Y (2017) Realtime multi-person 2d pose estimation using part affinity fields. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7291\u20137299","DOI":"10.1109\/CVPR.2017.143"},{"key":"2483_CR4","doi-asserted-by":"crossref","unstructured":"Chen Y, Ma H, Wang J, Wu J, Wu X, Xie X (2021) PD-Net: quantitative motor function evaluation for Parkinson\u2019s disease via automated hand gesture analysis. In: Proceedings of the 27th ACM SIGKDD conference on knowledge discovery and data mining, pp 2683\u20132691","DOI":"10.1145\/3447548.3467130"},{"key":"2483_CR5","doi-asserted-by":"crossref","unstructured":"Chen Y, Wang Z, Peng Y, Zhang Z, Yu G, Sun J (2018) Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7103\u20137112","DOI":"10.1109\/CVPR.2018.00742"},{"key":"2483_CR6","doi-asserted-by":"crossref","unstructured":"Das S, Sharma S, Dai R, Bremond F, Thonnat M (2020) Vpn: learning video-pose embedding for activities of daily living. In: Computer vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part IX 16. Springer International Publishing, pp 72\u201390","DOI":"10.1007\/978-3-030-58545-7_5"},{"key":"2483_CR7","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Houlsby N (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"2483_CR8","doi-asserted-by":"crossref","unstructured":"Fang H S, Xie S, Tai Y W, Lu C (2017) Rmpe: regional multi-person pose estimation. In: Proceedings of the IEEE international conference on computer vision, pp 2334\u20132343","DOI":"10.1109\/ICCV.2017.256"},{"key":"2483_CR9","doi-asserted-by":"crossref","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3146\u20133154","DOI":"10.1109\/CVPR.2019.00326"},{"key":"2483_CR10","unstructured":"Hendrycks D, Gimpel K (2016). Gaussian error linear units (gelus). arXiv preprint arXiv:1606.08415"},{"key":"2483_CR11","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"issue":"4","key":"2483_CR12","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1162\/neco.1989.1.4.541","volume":"1","author":"Y LeCun","year":"1989","unstructured":"LeCun Y, Boser B, Denker JS, Henderson D, Howard RE, Hubbard W, Jackel LD (1989) Backpropagation applied to handwritten zip code recognition. Neural computation 1(4):541\u2013551","journal-title":"Neural computation"},{"key":"2483_CR13","doi-asserted-by":"crossref","unstructured":"Li K, Wang S, Zhang X, Xu Y, Xu W, & Tu Z (2021). Pose recognition with cascade transformers. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 1944-1953)","DOI":"10.1109\/CVPR46437.2021.00198"},{"key":"2483_CR14","doi-asserted-by":"crossref","unstructured":"Li X, Wang W, Hu X, Yang J (2019) Selective kernel networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 510\u2013519","DOI":"10.1109\/CVPR.2019.00060"},{"key":"2483_CR15","doi-asserted-by":"crossref","unstructured":"Li Y, Zhang S, Wang Z, Yang S, Yang W, Xia S T, Zhou E (2021) Tokenpose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 11313\u201311322","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"2483_CR16","doi-asserted-by":"crossref","unstructured":"Lin K, Wang L, Liu Z (2021) End-to-end human pose and mesh reconstruction with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1954\u20131963","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"2483_CR17","doi-asserted-by":"crossref","unstructured":"Lin T Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Zitnick C L (2014) Microsoft coco: common objects in context. In: Computer Vision-ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13. Springer International Publishing, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2483_CR18","unstructured":"Ma H, Chen L, Kong D, Wang Z, Liu X, Tang H, Xie X (2021) Transfusion: Cross-view fusion with transformer for 3d human pose estimation. arXiv preprint arXiv:2110.09554"},{"key":"2483_CR19","doi-asserted-by":"crossref","unstructured":"Ma H, Wang Z, Chen Y, Kong D, Chen L, Liu X, Xie X (2022) Ppt: token-pruned pose transformer for monocular and multi-view human pose estimation. In: European conference on computer vision. Springer Nature Switzerland, Cham, pp 424\u2013442","DOI":"10.1007\/978-3-031-20065-6_25"},{"key":"2483_CR20","doi-asserted-by":"crossref","unstructured":"Mao W, Ge Y, Shen C, Tian Z, Wang X, Wang Z, den Hengel A V (2022) Poseur: Direct human pose regression with transformers. In: European conference on computer vision. Springer Nature Switzerland, Cham, pp 72\u201388","DOI":"10.1007\/978-3-031-20068-7_5"},{"issue":"4","key":"2483_CR21","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta D, Sridhar S, Sotnychenko O, Rhodin H, Shafiei M, Seidel HP, Theobalt C (2017) Vnect: real-time 3d human pose estimation with a single rgb camera. Acm Trans Graph (tog) 36(4):1\u201314","journal-title":"Acm Trans Graph (tog)"},{"key":"2483_CR22","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted boltzmann machines. In: Proceedings of the 27th international conference on machine learning (ICML-10), pp 807\u2013814"},{"key":"2483_CR23","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. In: Computer vision-ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14. Springer International Publishing, pp 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"2483_CR24","doi-asserted-by":"crossref","unstructured":"Papandreou G, Zhu T, Kanazawa N, Toshev A, Tompson J, Bregler C, Murphy K (2017) Towards accurate multi-person pose estimation in the wild. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4903\u20134911","DOI":"10.1109\/CVPR.2017.395"},{"key":"2483_CR25","doi-asserted-by":"crossref","unstructured":"Rafi U, Doering A, Leibe B, Gall J (2020) Self-supervised keypoint correspondences for multi-person pose estimation and tracking in videos. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XX 16. Springer International Publishing, pp 36\u201352","DOI":"10.1007\/978-3-030-58565-5_3"},{"key":"2483_CR26","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D, Wang J (2019). Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5693\u20135703","DOI":"10.1109\/CVPR.2019.00584"},{"key":"2483_CR27","doi-asserted-by":"crossref","unstructured":"Sun X, Xiao B, Wei F, Liang S, Wei Y (2018) Integral human pose regression. In Proceedings of the European conference on computer vision (ECCV), pp 529\u2013545","DOI":"10.1007\/978-3-030-01231-1_33"},{"key":"2483_CR28","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez A N, Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"2483_CR29","doi-asserted-by":"crossref","unstructured":"Wang Y, Li M, Cai H, Chen W M, Han S (2022) Lite pose: efficient architecture design for 2d human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13126\u201313136","DOI":"10.1109\/CVPR52688.2022.01278"},{"key":"2483_CR30","doi-asserted-by":"crossref","unstructured":"Wang Z, Shin D, Fowlkes CC (2020) Predicting camera viewpoint improves cross-dataset generalization for 3d human pose estimation. In: Computer vision-ECCV 2020 workshops: Glasgow, UK, August 23-28, 2020, Proceedings, Part II 16. Springer International Publishing, pp 523\u2013540","DOI":"10.1007\/978-3-030-66096-3_36"},{"key":"2483_CR31","doi-asserted-by":"crossref","unstructured":"Wang Z, Yang J, Fowlkes C (2022) The best of both worlds: combining model-based and nonparametric approaches for 3d human body estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2318\u20132327","DOI":"10.1109\/CVPRW56347.2022.00258"},{"key":"2483_CR32","doi-asserted-by":"crossref","unstructured":"Xiao B, Wu H, Wei Y (2018) Simple baselines for human pose estimation and tracking. In: Proceedings of the European conference on computer vision (ECCV), pp 466\u2013481","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"2483_CR33","doi-asserted-by":"crossref","unstructured":"Yang S, Quan Z, Nie M, Yang W (2021) Transpose: Keypoint localization via transformer. In: Proceedings of the IEEE\/CVF International conference on computer vision, pp 11802\u201311812","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"2483_CR34","doi-asserted-by":"crossref","unstructured":"Yuan L, Chen Y, Wang T, Yu W, Shi Y, Jiang Z H, Yan S (2021) Tokens-to-token vit: Training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 558\u2013567","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"2483_CR35","doi-asserted-by":"crossref","unstructured":"Zhao S, Liu K, Huang Y, Bao Q, Zeng D, Liu W (2022) DPIT: dual-pipeline Integrated transformer for human pose estimation. In: CAAI International conference on artificial intelligence. Springer Nature Switzerland, Cham, pp 559\u2013576","DOI":"10.1007\/978-3-031-20500-2_46"},{"key":"2483_CR36","doi-asserted-by":"crossref","unstructured":"Toshev A, Szegedy C (2014) Deeppose: human pose estimation via deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1653\u20131660","DOI":"10.1109\/CVPR.2014.214"},{"key":"2483_CR37","unstructured":"Zhang F, Zhu X, Wang C (2021) Single person pose estimation: a survey. arXiv preprint arXiv:2109.10056"},{"key":"2483_CR38","doi-asserted-by":"crossref","unstructured":"Liu R, Liu Y, Xin W, Miao Q, Li L (2024) Action jitter Kkiller: joint noise optimization cascade for skeleton-based action recognition. IEEE Trans Instrum Meas","DOI":"10.1109\/TIM.2024.3370958"},{"key":"2483_CR39","doi-asserted-by":"crossref","unstructured":"Xin W, Miao Q, Liu Y, Liu R, Pun C M, Shi C (2023). Skeleton mixformer: Multivariate topology representation for skeleton-based action recognition. In Proceedings of the 31st ACM International Conference on Multimedia (pp. 2211-2220)","DOI":"10.1145\/3581783.3611900"},{"key":"2483_CR40","doi-asserted-by":"crossref","unstructured":"Xin W, Liu Y, Liu R, Miao Q, Shi C, Pun C M (2023) Auto-learning-GCN: an ingenious framework for skeleton-based action recognition. In: Chinese conference on pattern recognition and computer vision (PRCV). Springer Nature Singapore, Singapore, pp 29\u201342","DOI":"10.1007\/978-981-99-8429-9_3"},{"key":"2483_CR41","doi-asserted-by":"publisher","first-page":"164","DOI":"10.1016\/j.neucom.2023.03.001","volume":"537","author":"W Xin","year":"2023","unstructured":"Xin W, Liu R, Liu Y, Chen Y, Yu W, Miao Q (2023) Transformer for skeleton-based action recognition: a review of recent advances. Neurocomputing 537:164\u2013186","journal-title":"Neurocomputing"},{"key":"2483_CR42","unstructured":"Zhou D, Yu Z, Xie E, Xiao C, Anandkumar A, Feng J, Alvarez J M (2022). Understanding the robustness in vision transformers. In: International conference on machine learning. PMLR, pp 27378\u201327394"},{"issue":"19","key":"2483_CR43","doi-asserted-by":"publisher","first-page":"4019","DOI":"10.3390\/electronics12194019","volume":"12","author":"X Wang","year":"2023","unstructured":"Wang X, Shi N, Wang G, Shao J, Zhao S (2023) A multi-channel parallel keypoint fusion framework for human pose estimation. Electronics 12(19):4019","journal-title":"Electronics"},{"key":"2483_CR44","doi-asserted-by":"crossref","unstructured":"Gu K, Yang L, Yao A (2021) Removing the bias of integral pose regression. In Proceedings of the IEEE\/CVF International conference on computer vision, pp 11067\u201311076","DOI":"10.1109\/ICCV48922.2021.01088"},{"key":"2483_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, Min et al. (2022) Human pose estimation based on parallel atrous convolution and body structure constraints. 1 Jan: 5553\u20135563","DOI":"10.3233\/JIFS-212061"},{"key":"2483_CR46","doi-asserted-by":"crossref","unstructured":"Yang, Zhihui et al (2021) A Combined local and global structure module for human pose estimation. 1 Jan: 1913\u20131923","DOI":"10.3233\/JCM-215210"},{"key":"2483_CR47","doi-asserted-by":"publisher","unstructured":"Chen W, Sang H, Wang J et al (2024) WTGCN: wavelet transform graph convolution network for pedestrian trajectory prediction. Int J Mach Learn Cyberhttps:\/\/doi.org\/10.1007\/s13042-024-02258-5","DOI":"10.1007\/s13042-024-02258-5"},{"key":"2483_CR48","doi-asserted-by":"publisher","unstructured":"He C, Zhang J, Chen L et al (2024) Domain adaptation with optimized feature distribution for streamer action recognition in live video. Int J Mach Learn Cyber https:\/\/doi.org\/10.1007\/s13042-024-02174-8","DOI":"10.1007\/s13042-024-02174-8"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02483-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02483-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02483-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T04:30:35Z","timestamp":1749270635000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02483-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,24]]},"references-count":48,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["2483"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02483-y","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,24]]},"assertion":[{"value":"28 June 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}