{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T10:09:23Z","timestamp":1764842963395,"version":"3.40.3"},"publisher-location":"Cham","reference-count":51,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031728891"},{"type":"electronic","value":"9783031728907"}],"license":[{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72890-7_13","type":"book-chapter","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T19:45:07Z","timestamp":1733514307000},"page":"213-230","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["GTPT: Group-Based Token Pruning Transformer for\u00a0Efficient Human Pose Estimation"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7159-2432","authenticated-orcid":false,"given":"Haonan","family":"Wang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9297-7729","authenticated-orcid":false,"given":"Jie","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6086-3559","authenticated-orcid":false,"given":"Jie","family":"Tang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1391-1762","authenticated-orcid":false,"given":"Gangshan","family":"Wu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2136-3814","authenticated-orcid":false,"given":"Bo","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-1137-4771","authenticated-orcid":false,"given":"Yanbing","family":"Chou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2844-6296","authenticated-orcid":false,"given":"Yong","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,7]]},"reference":[{"key":"13_CR1","unstructured":"Bukschat, Y., Vetter, M.: Efficientpose: an efficient, accurate and scalable end-to-end 6D multi object pose estimation approach. arXiv preprint arXiv:2011.04307 (2020)"},{"issue":"01","key":"13_CR2","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2021","unstructured":"Cao, Z., Hidalgo, G., Simon, T., Wei, S.E., Sheikh, Y.: OpenPose: realtime multi-person 2D pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. 43(01), 172\u2013186 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2D pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Chen, H., Feng, R., Wu, S., Xu, H., Zhou, F., Liu, Z.: 2D human pose estimation: a survey. arXiv preprint arXiv:2204.07370 (2022)","DOI":"10.1007\/s00530-022-01019-0"},{"key":"13_CR5","unstructured":"Cheng, B., Schwing, A., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. Adv. Neural. Inf. Process. Syst. 34, 17864\u201317875 (2021)"},{"key":"13_CR6","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"13_CR7","doi-asserted-by":"crossref","unstructured":"Fang, H.S., et al.: Alphapose: whole-body regional multi-person pose estimation and tracking in real-time. IEEE Trans. Pattern Anal. Mach. Intell. (2022)","DOI":"10.1109\/TPAMI.2022.3222784"},{"key":"13_CR8","doi-asserted-by":"crossref","unstructured":"Garau, N., Bisagno, N., Br\u00f3dka, P., Conci, N.: DECA: deep viewpoint-equivariant human pose estimation using capsule autoencoders. arXiv preprint arXiv:2108.08557 (2021)","DOI":"10.1109\/ICCV48922.2021.01147"},{"key":"13_CR9","unstructured":"Hidalgo, G., et al.: Single-network whole-body pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6982\u20136991 (2019)"},{"key":"13_CR10","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"13_CR12","unstructured":"Jiang, T., et al.: RTMPose: real-time multi-person pose estimation based on MMPose. arXiv preprint arXiv:2303.07399 (2023)"},{"key":"13_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/978-3-030-58545-7_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Jin","year":"2020","unstructured":"Jin, S., et al.: Whole-body human pose estimation in the wild. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 196\u2013214. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_12"},{"key":"13_CR14","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-3-031-20068-7_6","volume-title":"ECCV 2022","author":"Y Li","year":"2022","unstructured":"Li, Y., et al.: SimCC: a simple coordinate classification perspective for human pose estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13666, pp. 89\u2013106. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_6"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: TokenPose: learning keypoint tokens for human pose estimation. arXiv preprint arXiv:2104.03516 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., Ye, J., Song, M., Huang, Y., Pan, Z.: Online knowledge distillation for efficient pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11740\u201311750 (2021)","DOI":"10.1109\/ICCV48922.2021.01153"},{"key":"13_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"13_CR18","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"13_CR19","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"424","DOI":"10.1007\/978-3-031-20065-6_25","volume-title":"ECCV 2022","author":"H Ma","year":"2022","unstructured":"Ma, H., et al.: PPT: token-pruned pose transformer for monocular and multi-view human pose estimation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13665, pp. 424\u2013442. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20065-6_25"},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Ma, N., Zhang, X., Zheng, H.T., Sun, J.: Shufflenet v2: practical guidelines for efficient CNN architecture design. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 116\u2013131 (2018)","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"13_CR21","unstructured":"Mao, W., Ge, Y., Shen, C., Tian, Z., Wang, X., Wang, Z.: TFPose: direct human pose estimation with transformers. arXiv preprint arXiv:2103.15320 (2021)"},{"key":"13_CR22","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1007\/978-3-031-20068-7_5","volume-title":"ECCV 2022 Part VI","author":"W Mao","year":"2022","unstructured":"Mao, W., et al.: Poseur: direct human pose regression with transformers. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022 Part VI. LNCS, vol. 13666, pp. 72\u201388. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_5"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Neff, C., Sheth, A., Furgurson, S., Tabkhi, H.: EfficienthrNet: efficient scaling for lightweight high-resolution multi-person pose estimation. arXiv preprint arXiv:2007.08090 (2020)","DOI":"10.1007\/s11554-021-01132-9"},{"key":"13_CR24","unstructured":"Newell, A., Huang, Z., Deng, J.: Associative embedding: End-to-end learning for joint detection and grouping. In: Advances in Neural Information Processing Systems (2017)"},{"key":"13_CR25","doi-asserted-by":"crossref","unstructured":"Osokin, D.: Real-time 2D multi-person pose estimation on CPU: Lightweight openpose. arXiv preprint arXiv:1811.12004 (2018)","DOI":"10.5220\/0007555407440748"},{"key":"13_CR26","doi-asserted-by":"crossref","unstructured":"Shen, X., et al.: Towards fast and accurate multi-person pose estimation on mobile devices. arXiv preprint arXiv:2106.15304 (2021)","DOI":"10.24963\/ijcai.2021\/715"},{"key":"13_CR27","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"13_CR28","unstructured":"Tompson, J.J., Jain, A., LeCun, Y., Bregler, C.: Joint training of a convolutional network and a graphical model for human pose estimation. In: Advances in Neural Information Processing Systems , vol. 27 (2014)"},{"key":"13_CR29","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: DeepPose: human pose estimation via deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"13_CR30","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"13_CR31","doi-asserted-by":"crossref","unstructured":"Vats, A., Anastasiu, D.C.: Key point-based driver activity recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3274\u20133281 (2022)","DOI":"10.1109\/CVPRW56347.2022.00370"},{"key":"13_CR32","doi-asserted-by":"crossref","unstructured":"Wang, H., Liu, J., Tang, J., Wu, G.: Lightweight super-resolution head for human pose estimation. arXiv preprint arXiv:2307.16765 (2023)","DOI":"10.1145\/3581783.3612236"},{"key":"13_CR33","doi-asserted-by":"crossref","unstructured":"Wang, M., Tighe, J., Modolo, D.: Combining detection and tracking for human pose estimation in videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11088\u201311096 (2020)","DOI":"10.1109\/CVPR42600.2020.01110"},{"key":"13_CR34","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"13_CR35","doi-asserted-by":"crossref","unstructured":"Wang, Y., Li, M., Cai, H., Chen, W.M., Han, S.: Lite pose: efficient architecture design for 2D human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13126\u201313136 (2022)","DOI":"10.1109\/CVPR52688.2022.01278"},{"key":"13_CR36","doi-asserted-by":"crossref","unstructured":"Wehrbein, T., Rudolph, M., Rosenhahn, B., Wandt, B.: Probabilistic monocular 3d human pose estimation with normalizing flows. arXiv preprint arXiv:2107.13788 (2021)","DOI":"10.1109\/ICCV48922.2021.01101"},{"key":"13_CR37","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"issue":"4","key":"13_CR38","first-page":"5296","volume":"45","author":"L Xu","year":"2022","unstructured":"Xu, L., et al.: ZoomNas: searching for whole-body human pose estimation in the wild. IEEE Trans. Pattern Anal. Mach. Intell. 45(4), 5296\u20135313 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR39","unstructured":"Xu, Y., Zhang, J., Zhang, Q., Tao, D.: ViTPose: simple vision transformer baselines for human pose estimation. arXiv preprint arXiv:2204.12484 (2022)"},{"key":"13_CR40","unstructured":"Xu, Y., Zhang, J., Zhang, Q., Tao, D.: Vitpose+: vision transformer foundation model for generic body pose estimation. arXiv preprint arXiv:2212.04246 (2022)"},{"key":"13_CR41","doi-asserted-by":"publisher","first-page":"107948","DOI":"10.1016\/j.knosys.2021.107948","volume":"239","author":"SK Yadav","year":"2022","unstructured":"Yadav, S.K., Luthra, A., Tiwari, K., Pandey, H.M., Akbar, S.A.: ARFDNet: an efficient activity recognition & fall detection system using latent feature pooling. Knowl. Based Syst. 239, 107948 (2022)","journal-title":"Knowl. Based Syst."},{"key":"13_CR42","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: Transpose: keypoint localization via transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"13_CR43","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zeng, A., Yuan, C., Li, Y.: Effective whole-body pose estimation with two-stages distillation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4210\u20134220 (2023)","DOI":"10.1109\/ICCVW60793.2023.00455"},{"key":"13_CR44","doi-asserted-by":"crossref","unstructured":"Ye, S., et al.: DistilPose: tokenized pose regression with heatmap distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2163\u20132172 (2023)","DOI":"10.1109\/CVPR52729.2023.00215"},{"key":"13_CR45","doi-asserted-by":"crossref","unstructured":"Yu, C., et al.: Lite-HRNet: a lightweight high-resolution network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10440\u201310450 (2021)","DOI":"10.1109\/CVPR46437.2021.01030"},{"key":"13_CR46","first-page":"7281","volume":"34","author":"Y Yuan","year":"2021","unstructured":"Yuan, Y., et al.: HRFormer: high-resolution vision transformer for dense predict. Adv. Neural. Inf. Process. Syst. 34, 7281\u20137293 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"13_CR47","doi-asserted-by":"crossref","unstructured":"Zeng, A., Sun, X., Yang, L., Zhao, N., Liu, M., Xu, Q.: Learning skeletal graph neural networks for hard 3D pose estimation. arXiv preprint arXiv:2108.07181 (2021)","DOI":"10.1109\/ICCV48922.2021.01124"},{"key":"13_CR48","doi-asserted-by":"crossref","unstructured":"Zeng, W., et al.: Not all tokens are equal: human-centric visual analysis via token clustering transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11101\u201311111 (2022)","DOI":"10.1109\/CVPR52688.2022.01082"},{"key":"13_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhu, X., Ye, M.: Fast human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3517\u20133526 (2019)","DOI":"10.1109\/CVPR.2019.00363"},{"key":"13_CR50","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"13_CR51","doi-asserted-by":"crossref","unstructured":"Zou, S., et al.: Eventhpe: event-based 3D human pose and shape estimation. arXiv preprint arXiv:2108.06819 (2021)","DOI":"10.1109\/ICCV48922.2021.01081"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72890-7_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T20:05:50Z","timestamp":1733515550000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72890-7_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,7]]},"ISBN":["9783031728891","9783031728907"],"references-count":51,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72890-7_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,7]]},"assertion":[{"value":"7 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}