{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T14:49:15Z","timestamp":1774968555589,"version":"3.50.1"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,5,31]],"date-time":"2024-05-31T00:00:00Z","timestamp":1717113600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,31]],"date-time":"2024-05-31T00:00:00Z","timestamp":1717113600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimedia Systems"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s00530-024-01368-y","type":"journal-article","created":{"date-parts":[[2024,5,31]],"date-time":"2024-05-31T12:02:27Z","timestamp":1717156947000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["SD-Pose: facilitating space-decoupled human pose estimation via adaptive pose perception guidance"],"prefix":"10.1007","volume":"30","author":[{"given":"Zhi","family":"Liu","sequence":"first","affiliation":[]},{"given":"Shengzhao","family":"Hao","sequence":"additional","affiliation":[]},{"given":"Yunhua","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Cong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Ruohuang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,5,31]]},"reference":[{"key":"1368_CR1","doi-asserted-by":"crossref","unstructured":"Li, Y., Yang, S., Liu, P., et al.: SimCC: a simple coordinate classification perspective for human pose estimation. In: European Conference on Computer Vision, pp. 89\u2013106. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_6"},{"key":"1368_CR2","unstructured":"Jiang, T., Lu, P., Zhang, L., et al.: RTMPose: real-time multi-person pose estimation based on mmpose. arXiv preprint arXiv:2303.07399 (2023)"},{"key":"1368_CR3","unstructured":"Yuan, Y., Fu, R., Huang, L., et al.: HRFormer: high-resolution transformer for dense prediction. arXiv preprint arXiv:2110.09408 (2021)"},{"key":"1368_CR4","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, S., Wang, Z., et al.: TokenPose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"1368_CR5","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., et al.: Transpose: keypoint localization via transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"1368_CR6","doi-asserted-by":"publisher","first-page":"15949","DOI":"10.1109\/TPAMI.2023.3311447","volume":"45","author":"J Gao","year":"2023","unstructured":"Gao, J., Chen, M., Xu, C.: Vectorized evidential learning for weakly-supervised temporal action localization. IEEE Trans. Pattern Anal. Mach. Intell. 45, 15949\u201315963 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"1368_CR7","doi-asserted-by":"publisher","first-page":"3476","DOI":"10.1109\/TPAMI.2020.2985708","volume":"43","author":"J Gao","year":"2020","unstructured":"Gao, J., Zhang, T., Xu, C.: Learning to model relationships for zero-shot video classification. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3476\u20133491 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1368_CR8","doi-asserted-by":"crossref","unstructured":"Wang, W., Xie, E., Li, X., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"3","key":"1368_CR9","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang, W., Xie, E., Li, X., et al.: PVT v2: improved baselines with pyramid vision transformer. Comput. Visual Media 8(3), 415\u2013424 (2022)","journal-title":"Comput. Visual Media"},{"key":"1368_CR10","doi-asserted-by":"crossref","unstructured":"Tompson, J., Goroshin, R., Jain, A., et al.: Efficient object localization using convolutional networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 648\u2013656 (2015)","DOI":"10.1109\/CVPR.2015.7298664"},{"key":"1368_CR11","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part VIII 14, pp. 483\u2013499. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"1368_CR12","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., et al.: Cascaded pyramid network for multi-person pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"1368_CR13","doi-asserted-by":"crossref","unstructured":"Lin, G., Milan, A., Shen, C., et al.: RefineNet: multi-path refinement networks for high-resolution semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1925\u20131934 (2017)","DOI":"10.1109\/CVPR.2017.549"},{"key":"1368_CR14","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European conference on computer vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"1368_CR15","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., et al.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1368_CR16","doi-asserted-by":"crossref","unstructured":"Cheng, B., Xiao, B., Wang, J., et al.: HigherHRNet: scale-aware representation learning for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5386\u20135395 (2020)","DOI":"10.1109\/CVPR42600.2020.00543"},{"key":"1368_CR17","doi-asserted-by":"crossref","unstructured":"Artacho, B., Savakis, A.: UniPose: unified human pose estimation in single images and videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7035\u20137044 (2020)","DOI":"10.1109\/CVPR42600.2020.00706"},{"key":"1368_CR18","doi-asserted-by":"crossref","unstructured":"Huang, J., Zhu, Z., Guo, F., et al.: The devil is in the details: delving into unbiased data processing for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5700\u20135709 (2020)","DOI":"10.1109\/CVPR42600.2020.00574"},{"key":"1368_CR19","doi-asserted-by":"crossref","unstructured":"Sun, X., Shang, J., Liang, S., et al.: Compositional human pose regression. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2602\u20132611 (2017)","DOI":"10.1109\/ICCV.2017.284"},{"key":"1368_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhu, X., Dai, H., et al.: Distribution-aware coordinate representation for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7093\u20137102 (2020)","DOI":"10.1109\/CVPR42600.2020.00712"},{"key":"1368_CR21","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: DeepPose: human pose estimation via deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"1368_CR22","doi-asserted-by":"crossref","unstructured":"Carreira, J., Agrawal, P., Fragkiadaki, K., et al.: Human pose estimation with iterative error feedback. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4733\u20134742 (2016)","DOI":"10.1109\/CVPR.2016.512"},{"key":"1368_CR23","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1368_CR24","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1016\/j.cag.2019.09.002","volume":"85","author":"DC Luvizon","year":"2019","unstructured":"Luvizon, D.C., Tabia, H., Picard, D.: Human pose regression by combining indirect part detection and contextual information. Comput. Graph. 85, 15\u201322 (2019)","journal-title":"Comput. Graph."},{"key":"1368_CR25","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., et al.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1368_CR26","doi-asserted-by":"crossref","unstructured":"Li, J., Bian, S., Zeng, A., et al.: Human pose regression with residual log-likelihood estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11025\u201311034 (2021)","DOI":"10.1109\/ICCV48922.2021.01084"},{"key":"1368_CR27","doi-asserted-by":"crossref","unstructured":"Mao, W., Ge, Y., Shen, C., et al.: Poseur: direct human pose regression with transformers. In: European Conference on Computer Vision, pp. 72\u201388. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_5"},{"key":"1368_CR28","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1368_CR29","doi-asserted-by":"crossref","unstructured":"Hou, Q., Zhou, D., Feng, J.: Coordinate attention for efficient mobile network design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13713\u201313722 (2021)","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"1368_CR30","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.Y., et al.: CBAM: convolutional block attention module. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1368_CR31","doi-asserted-by":"crossref","unstructured":"Yang, Q., Shi, W., Chen, J., et al.: Localization of hard joints in human pose estimation based on residual down-sampling and attention mechanism. The Visual Computer, pp. 1\u201313 (2021)","DOI":"10.1007\/s00371-021-02122-5"},{"issue":"3","key":"1368_CR32","doi-asserted-by":"publisher","first-page":"1646","DOI":"10.1109\/TCSVT.2021.3075470","volume":"32","author":"J Gao","year":"2021","unstructured":"Gao, J., Xu, C.: Learning video moment retrieval without a single annotated video. IEEE Trans. Circuits Syst. Video Technol. 32(3), 1646\u20131657 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1368_CR33","doi-asserted-by":"publisher","first-page":"5410","DOI":"10.1109\/TMM.2023.3333206","volume":"26","author":"Y Hu","year":"2023","unstructured":"Hu, Y., Gao, J., Dong, J., et al.: Exploring rich semantics for open-set action recognition. IEEE Trans. Multimedia 26, 5410\u20135421 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"1368_CR34","doi-asserted-by":"crossref","unstructured":"Su, K., Yu, D., Xu, Z., et al.: Multi-person pose estimation with enhanced channel-wise and spatial information. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5674\u20135682 (2019)","DOI":"10.1109\/CVPR.2019.00582"},{"key":"1368_CR35","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1368_CR36","first-page":"12077","volume":"34","author":"E Xie","year":"2021","unstructured":"Xie, E., Wang, W., Yu, Z., et al.: SegFormer: simple and efficient design for semantic segmentation with transformers. Adv. Neural Inf. Process. Syst. 34, 12077\u201312090 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1368_CR37","unstructured":"Yang, B., Bender, G., Le, Q.V., et al.: CondConv: conditionally parameterized convolutions for efficient inference. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1368_CR38","unstructured":"Li, C., Zhou, A., Yao, A.: Omni-dimensional dynamic convolution. arXiv preprint arXiv:2209.07947 (2022)"},{"key":"1368_CR39","unstructured":"Ramachandran, P., Zoph, B., Le, Q.V.: Searching for activation functions. arXiv preprint arXiv:1710.05941. (2017)"},{"key":"1368_CR40","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., et al.: Microsoft COCO: common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1368_CR41","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., et al.: 2D human pose estimation: new benchmark and state of the art analysis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3686\u20133693 (2014)","DOI":"10.1109\/CVPR.2014.471"},{"key":"1368_CR42","doi-asserted-by":"crossref","unstructured":"Li, K., Wang, Y., Zhang, J., et al.: UniFormer: unifying convolution and self-attention for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3282631"},{"key":"1368_CR43","doi-asserted-by":"crossref","unstructured":"Li, K., Wang, S., Zhang, X., et al.: Pose recognition with cascade transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1944\u20131953 (2021)","DOI":"10.1109\/CVPR46437.2021.00198"},{"key":"1368_CR44","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Zhu, T., Kanazawa, N., et al.: Towards accurate multi-person pose estimation in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4903\u20134911 (2017)","DOI":"10.1109\/CVPR.2017.395"},{"key":"1368_CR45","doi-asserted-by":"crossref","unstructured":"Sun, X., Xiao, B., Wei, F., et al.: Integral human pose regression. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 529\u2013545 (2018)","DOI":"10.1007\/978-3-030-01231-1_33"},{"issue":"4","key":"1368_CR46","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3503464","volume":"18","author":"H Dai","year":"2022","unstructured":"Dai, H., Shi, H., Liu, W., et al.: Fasterpose: a faster simple baseline for human pose estimation. ACM Trans. Multimedia Comput. Commun. Appl. (TOMM) 18(4), 1\u201316 (2022)","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl. (TOMM)"},{"key":"1368_CR47","unstructured":"Mao, W., Ge, Y., Shen, C., et al.: TFPose: direct human pose estimation with transformers. arXiv preprint arXiv:2103.15320 (2021)"},{"key":"1368_CR48","doi-asserted-by":"crossref","unstructured":"Fang, H.S., Xie, S., Tai, Y.W., et al.: RMPE: regional multi-person pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2334\u20132343 (2017)","DOI":"10.1109\/ICCV.2017.256"},{"key":"1368_CR49","doi-asserted-by":"crossref","unstructured":"Ye, S., Zhang, Y., Hu, J., et al.: DistilPose: tokenized pose regression with heatmap distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2163\u20132172 (2023)","DOI":"10.1109\/CVPR52729.2023.00215"},{"key":"1368_CR50","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1368_CR51","doi-asserted-by":"crossref","unstructured":"Cai, Y., Wang, Z., Luo, Z., et al.: Learning delicate local representations for multi-person pose estimation. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part III 16, pp. 455\u2013472. Springer (2020)","DOI":"10.1007\/978-3-030-58580-8_27"},{"key":"1368_CR52","unstructured":"Lyu, C., Zhang, W., Huang, H., et al.: RTMDet: an empirical study of designing real-time object detectors. arXiv preprint arXiv:2212.07784 (2022)"}],"container-title":["Multimedia Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01368-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00530-024-01368-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00530-024-01368-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,5]],"date-time":"2024-07-05T17:24:05Z","timestamp":1720200245000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00530-024-01368-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,31]]},"references-count":52,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["1368"],"URL":"https:\/\/doi.org\/10.1007\/s00530-024-01368-y","relation":{},"ISSN":["0942-4962","1432-1882"],"issn-type":[{"value":"0942-4962","type":"print"},{"value":"1432-1882","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,31]]},"assertion":[{"value":"3 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 May 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"163"}}