{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T17:05:40Z","timestamp":1772643940376,"version":"3.50.1"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T00:00:00Z","timestamp":1766188800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T00:00:00Z","timestamp":1766188800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2026,1]]},"DOI":"10.1007\/s00371-025-04216-w","type":"journal-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T14:44:37Z","timestamp":1766241877000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Structure-aware transformer for enhanced low-resolution human pose estimation"],"prefix":"10.1007","volume":"42","author":[{"given":"Jiancong","family":"Liang","sequence":"first","affiliation":[]},{"given":"Yanmin","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Hongwei","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Ye","family":"Lin","sequence":"additional","affiliation":[]},{"given":"Jing-Ming","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"4216_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104452","volume":"123","author":"G Bai","year":"2022","unstructured":"Bai, G., Luo, Y., Pan, X., Wang, Y., Wang, J., Guo, J.-M.: Double chain networks for monocular 3d human pose estimation. Image Vis. Comput. 123, 104452 (2022)","journal-title":"Image Vis. Comput."},{"issue":"3","key":"4216_CR2","doi-asserted-by":"publisher","first-page":"964","DOI":"10.1109\/TPAMI.2019.2940446","volume":"43","author":"X Liu","year":"2019","unstructured":"Liu, X., Hu, Z., Ling, H., Cheung, Y.-M.: Mtfh: a matrix tri-factorization hashing framework for efficient cross-modal retrieval. IEEE Trans. Pattern Anal. Mach. Intell. 43(3), 964\u2013981 (2019)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4216_CR3","doi-asserted-by":"crossref","unstructured":"Wei, S.-E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines, in: Proceedings of the IEEE conference on Computer Vision and Pattern Recognition, 4724\u20134732 (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"4216_CR4","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"4216_CR5","doi-asserted-by":"crossref","unstructured":"Cai, Y., Wang, Z., Luo, Z., Yin, B., Du, A., Wang, H., Zhang, X., Zhou, X., Zhou, E., Sun, J.: Learning delicate local representations for multi-person pose estimation, in: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part III 16, Springer, 455\u2013472 (2020)","DOI":"10.1007\/978-3-030-58580-8_27"},{"key":"4216_CR6","doi-asserted-by":"crossref","unstructured":"Iqbal, U., Gall, J.: Multi-person pose estimation with local joint-to-person associations, in: Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8-10 and 15-16, 2016, Proceedings, Part II 14, Springer, 2016, pp. 627\u2013642","DOI":"10.1007\/978-3-319-48881-3_44"},{"key":"4216_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H.-S., Xie, S., Tai, Y.-W., Lu, C.: Rmpe: Regional multi-person pose estimation, in: Proceedings of the IEEE international conference on computer vision, 2334\u20132343 (2017)","DOI":"10.1109\/ICCV.2017.256"},{"key":"4216_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, Z., Peng, Y., Zhang, Z., Yu, G., Sun, J.: Cascaded pyramid network for multi-person pose estimation, in: Proceedings of the IEEE conference on computer vision and pattern recognition, 7103\u20137112 (2018)","DOI":"10.1109\/CVPR.2018.00742"},{"key":"4216_CR9","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, S., Wang, Z., Yang, S., Yang, W., Xia, S.-T., Zhou, E.: Tokenpose: Learning keypoint tokens for human pose estimation, in: Proceedings of the IEEE\/CVF International conference on computer vision, 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"4216_CR10","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: Transpose: Keypoint localization via transformer, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"4216_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2022.104390","volume":"119","author":"Y Luo","year":"2022","unstructured":"Luo, Y., Ou, Z., Wan, T., Guo, J.-M.: Fastnet: fast high-resolution network for human pose estimation. Image Vis. Comput. 119, 104390 (2022)","journal-title":"Image Vis. Comput."},{"key":"4216_CR12","doi-asserted-by":"crossref","unstructured":"Wang, Y.-J., Luo, Y.-M., Bai, G.-H., Guo, J.-M.: (2022) Uformpose: A u-shaped hierarchical multi-scale keypoint-aware framework for human pose estimation, IEEE Transactions on Circuits and Systems for Video Technology","DOI":"10.1109\/TCSVT.2022.3213206"},{"issue":"1","key":"4216_CR13","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s00371-021-02317-w","volume":"39","author":"X Zhao","year":"2023","unstructured":"Zhao, X., Guo, C., Zou, Q.: Human pose estimation with gated multi-scale feature fusion and spatial mutual information. Vis. Comput. 39(1), 119\u2013137 (2023)","journal-title":"Vis. Comput."},{"key":"4216_CR14","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.-E., Sheikh, Y.: Realtime multi-person 2d pose estimation using part affinity fields, in: Proceedings of the IEEE conference on computer vision and pattern recognition, 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"4216_CR15","doi-asserted-by":"crossref","unstructured":"Kreiss, S., Bertoni, L., Alahi, A.: Pifpaf: Composite fields for human pose estimation, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 11977\u201311986 (2019)","DOI":"10.1109\/CVPR.2019.01225"},{"key":"4216_CR16","doi-asserted-by":"crossref","unstructured":"Jin, S., Liu, W., Xie, E., Wang, W., Qian, C., Ouyang, W., Luo, P.: Differentiable hierarchical graph grouping for multi-person pose estimation, in: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VII 16, Springer, 718\u2013734 (2020)","DOI":"10.1007\/978-3-030-58571-6_42"},{"key":"4216_CR17","unstructured":"Newell, A., Huang, Z., Deng, J.: Associative embedding: End-to-end learning for joint detection and grouping, Advances in neural information processing systems 30 (2017)"},{"key":"4216_CR18","doi-asserted-by":"crossref","unstructured":"Cheng, B., Xiao, B., Wang, J., Shi, H., Huang, T.\u00a0S., Zhang, L.: Higherhrnet: Scale-aware representation learning for bottom-up human pose estimation, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 5386\u20135395 (2020)","DOI":"10.1109\/CVPR42600.2020.00543"},{"key":"4216_CR19","doi-asserted-by":"crossref","unstructured":"Xue, N., Wu, T., Xia, G.-S., Zhang, L.: Learning local-global contextual adaptation for multi-person pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 13065\u201313074 (2022)","DOI":"10.1109\/CVPR52688.2022.01272"},{"key":"4216_CR20","doi-asserted-by":"crossref","unstructured":"Luo, Z., Wang, Z., Huang, Y., Wang, L., Tan, T., Zhou, E.: Rethinking the heatmap regression for bottom-up human pose estimation, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 13264\u201313273 (2021)","DOI":"10.1109\/CVPR46437.2021.01306"},{"key":"4216_CR21","doi-asserted-by":"crossref","unstructured":"Wang, Y., Li, M., Cai, H., Chen, W.-M., Han, S.: Lite pose: Efficient architecture design for 2d human pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 13126\u201313136 (2022)","DOI":"10.1109\/CVPR52688.2022.01278"},{"key":"4216_CR22","first-page":"6278","volume":"34","author":"D Wang","year":"2021","unstructured":"Wang, D., Zhang, S., Hua, G.: Robust pose estimation in crowded scenes with direct pose-level inference. Adv. Neural. Inf. Process. Syst. 34, 6278\u20136289 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"4216_CR23","doi-asserted-by":"crossref","unstructured":"Geng, Z., Sun, K., Xiao, B., Zhang, Z., Wang, J.: Bottom-up human pose estimation via disentangled keypoint regression, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 14676\u201314686 (2021)","DOI":"10.1109\/CVPR46437.2021.01444"},{"key":"4216_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104198","volume":"111","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Luo, Y., Gou, J.: Double anchor embedding for accurate multi-person 2d pose estimation. Image Vis. Comput. 111, 104198 (2021)","journal-title":"Image Vis. Comput."},{"issue":"2","key":"4216_CR25","doi-asserted-by":"publisher","first-page":"551","DOI":"10.1007\/s11760-021-01999-y","volume":"16","author":"T Wan","year":"2022","unstructured":"Wan, T., Luo, Y., Zhang, Z., Ou, Z.: Tsnet: Tree structure network for human pose estimation. SIViP 16(2), 551\u2013558 (2022)","journal-title":"SIViP"},{"key":"4216_CR26","doi-asserted-by":"crossref","unstructured":"Wu, Y.-H., Zhang, S.-C., Liu, Y., Zhang, L., Zhan, X., Zhou, D., Feng, J., Cheng, M.-M., Zhen, L.: Low-resolution self-attention for semantic segmentation, IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)","DOI":"10.1109\/TPAMI.2025.3577035"},{"issue":"11","key":"4216_CR27","doi-asserted-by":"publisher","first-page":"12760","DOI":"10.1109\/TPAMI.2022.3202765","volume":"45","author":"Y-H Wu","year":"2022","unstructured":"Wu, Y.-H., Liu, Y., Zhan, X., Cheng, M.-M.: P2t: Pyramid pooling transformer for scene understanding. IEEE Trans. Pattern Anal. Mach. Intell. 45(11), 12760\u201312771 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4216_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.\u00a0L.: Microsoft coco: Common objects in context, in: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, Springer, 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"4216_CR29","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, C., Zhu, H., Mao, Y., Fang, H.-S., Lu, C.: Crowdpose: Efficient crowded scenes pose estimation and a new benchmark, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 10863\u201310872 (2019)","DOI":"10.1109\/CVPR.2019.01112"},{"issue":"1","key":"4216_CR30","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1007\/s11227-021-03889-z","volume":"78","author":"Z Ou","year":"2022","unstructured":"Ou, Z., Luo, Y., Chen, J., Chen, G.: Srfnet: selective receptive field network for human pose estimation. J. Supercomput. 78(1), 691\u2013711 (2022)","journal-title":"J. Supercomput."},{"key":"4216_CR31","doi-asserted-by":"crossref","unstructured":"Huang, J., Zhu, Z., Guo, F., Huang, G.: The devil is in the details: Delving into unbiased data processing for human pose estimation, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 5700\u20135709 (2020)","DOI":"10.1109\/CVPR42600.2020.00574"},{"key":"4216_CR32","doi-asserted-by":"crossref","unstructured":"Xu, L., Guan, Y., Jin, S., Liu, W., Qian, C., Luo, P., Ouyang, W., Wang, X.: Vipnas: Efficient video pose estimation via neural architecture search, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 16072\u201316081 (2021)","DOI":"10.1109\/CVPR46437.2021.01581"},{"key":"4216_CR33","doi-asserted-by":"crossref","unstructured":"Bao, F., Li, C., Cao, Y., Zhu, J.: All are worth words: a vit backbone for score-based diffusion models, arXiv preprint arXiv:2209.12152 (2022)","DOI":"10.1109\/CVPR52729.2023.02171"},{"key":"4216_CR34","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows, in: Proceedings of the IEEE\/CVF international conference on computer vision, 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"4216_CR35","doi-asserted-by":"crossref","unstructured":"Si, C., Yu, W., Zhou, P., Zhou, Y., Wang, X., Yan, S.: Inception transformer, arXiv preprint arXiv:2205.12956 (2022)","DOI":"10.52202\/068431-1707"},{"key":"4216_CR36","unstructured":"Wei, Z., Pan, H., Niu, X., Li, D.: Convformer: Closing the gap between cnn and vision transformers, arXiv preprint arXiv:2209.07738 (2022)"},{"key":"4216_CR37","doi-asserted-by":"crossref","unstructured":"Xu, X., Gao, Y., Yan, K., Lin, X., Zou, Q.: Location-free human pose estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 13137\u201313146 (2022)","DOI":"10.1109\/CVPR52688.2022.01279"},{"key":"4216_CR38","doi-asserted-by":"crossref","unstructured":"Shi, D., Wei, X., Li, L., Ren, Y., Tan, W.: End-to-end multi-person pose estimation with transformers, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 11069\u201311078 (2022)","DOI":"10.1109\/CVPR52688.2022.01079"},{"key":"4216_CR39","doi-asserted-by":"crossref","unstructured":"Panteleris, P., Argyros, A.: Pe-former: Pose estimation transformer, in: Pattern Recognition and Artificial Intelligence: Third International Conference, ICPRAI 2022, Paris, France, June 1\u20133, 2022, Proceedings, Part II, Springer, pp. 3\u201314 (2022)","DOI":"10.1007\/978-3-031-09282-4_1"},{"key":"4216_CR40","doi-asserted-by":"crossref","unstructured":"Zeng, W., Jin, S., Liu, W., Qian, C., Luo, P., Ouyang, W., Wang, X.: Not all tokens are equal: Human-centric visual analysis via token clustering transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 11101\u201311111 (2022)","DOI":"10.1109\/CVPR52688.2022.01082"},{"key":"4216_CR41","doi-asserted-by":"crossref","unstructured":"Mao, W., Ge, Y., Shen, C., Tian, Z., Wang, X., Wang, Z., den Hengel, A.\u00a0v.: Poseur: Direct human pose regression with transformers, in: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part VI, Springer, 72\u201388 (2022)","DOI":"10.1007\/978-3-031-20068-7_5"},{"key":"4216_CR42","unstructured":"Xu, Y., Zhang, J., Zhang, Q., Tao, D.: Vitpose: Simple vision transformer baselines for human pose estimation, arXiv preprint arXiv:2204.12484 (2022)"},{"issue":"8","key":"4216_CR43","doi-asserted-by":"publisher","first-page":"3429","DOI":"10.1007\/s00371-023-02953-4","volume":"39","author":"S Li","year":"2023","unstructured":"Li, S., Dai, J., Chen, Z., Pan, J.: Lightweight pose estimation network with multi-scale receptive field. Vis. Comput. 39(8), 3429\u20133440 (2023). https:\/\/doi.org\/10.1007\/s00371-023-02953-4","journal-title":"Vis. Comput."},{"key":"4216_CR44","doi-asserted-by":"publisher","unstructured":"Zhang, J., Liu, H., Li, Y., Li, W., Ding, R.: Tkformer: Typed keypoints guided transformer for human parsing, in: 2023 7th Asian Conference on Artificial Intelligence Technology (ACAIT), 2023, pp. 1468\u20131477. https:\/\/doi.org\/10.1109\/ACAIT60137.2023.10528482","DOI":"10.1109\/ACAIT60137.2023.10528482"},{"key":"4216_CR45","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition, in: Proceedings of the IEEE conference on computer vision and pattern recognition, 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4216_CR46","doi-asserted-by":"crossref","unstructured":"Xu, Y., Li, J., Ding, Y., Wei, H.-L.: A global to local double embedding method for multi-person pose estimation, in: Proceedings of the Asian Conference on Computer Vision, (2020)","DOI":"10.1007\/978-3-030-69541-5_6"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04216-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-025-04216-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-025-04216-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T13:00:40Z","timestamp":1772629240000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-025-04216-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,20]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,1]]}},"alternative-id":["4216"],"URL":"https:\/\/doi.org\/10.1007\/s00371-025-04216-w","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,20]]},"assertion":[{"value":"6 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"86"}}