{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:10:04Z","timestamp":1748196604149,"version":"3.41.0"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031915741","type":"print"},{"value":"9783031915758","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91575-8_21","type":"book-chapter","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T17:57:07Z","timestamp":1748195827000},"page":"343-358","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Boosting Pose Estimators via\u00a0Cross-Representation Distillation"],"prefix":"10.1007","author":[{"given":"Kang","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhendong","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jingyun","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoming","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chun","family":"Yuan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rizen","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"21_CR1","unstructured":"Cao, W., Zhang, Y., Gao, J., Cheng, A., Cheng, K., Cheng, J.: Pkd: General distillation framework for object detectors via pearson correlation coefficient. arXiv preprint arXiv:2207.02039 (2022)"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Cao, X., Yan, W.Q.: Pose estimation for swimmers in video surveillance. Multimedia Tools and Applications, pp. 1\u201316 (2023)","DOI":"10.1007\/s11042-023-16618-w"},{"key":"21_CR3","unstructured":"Contributors, M.: Openmmlab pose estimation toolbox and benchmark. https:\/\/github.com\/open-mmlab\/mmpose (2020)"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Gui, L.Y., Wang, Y.X., Liang, X., Moura, J.M.: Adversarial geometry-aware human motion prediction. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 786\u2013803 (2018)","DOI":"10.1007\/978-3-030-01225-0_48"},{"key":"21_CR5","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Huang, Z., Chan, K.C., Jiang, Y., Liu, Z.: Collaborative diffusion for multi-modal face generation and editing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6080\u20136090 (2023)","DOI":"10.1109\/CVPR52729.2023.00589"},{"key":"21_CR7","unstructured":"Jiang, T., et al.: Rtmpose: Real-time multi-person pose estimation based on mmpose. arXiv preprint arXiv:2303.07399 (2023)"},{"key":"21_CR8","unstructured":"Jin, P., Wu, Y., Fan, Y., Sun, Z., Yang, W., Yuan, L.: Act as you wish: fine-grained control of motion diffusion model with hierarchical semantic graphs. Adv. Neural Inform. Process. Syst. 36 (2024)"},{"key":"21_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1007\/978-3-030-58545-7_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Jin","year":"2020","unstructured":"Jin, S., et al.: Whole-body human pose estimation in the wild. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 196\u2013214. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_12"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: Simcc: a simple coordinate classification perspective for human pose estimation. In: European Conference on Computer Vision, pp. 89\u2013106. Springer (2022)","DOI":"10.1007\/978-3-031-20068-7_6"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"Li, Z., Ye, J., Song, M., Huang, Y., Pan, Z.: Online knowledge distillation for efficient pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11740\u201311750 (2021)","DOI":"10.1109\/ICCV48922.2021.01153"},{"key":"21_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wang, Y., Chu, X.: A simple and generic framework for feature distillation via channel-wise transformation. arXiv preprint arXiv:2303.13212 (2023)","DOI":"10.1109\/ICCVW60793.2023.00121"},{"key":"21_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"21_CR15","unstructured":"Romero, A., Ballas, N., Kahou, S.E., Chassang, A., Gatta, C., Bengio, Y.: Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Stypu\u0142kowski, M., Vougioukas, K., He, S., Zi\u0119ba, M., Petridis, S., Pantic, M.: Diffused heads: Diffusion models beat gans on talking-face generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5091\u20135100 (2024)","DOI":"10.1109\/WACV57701.2024.00502"},{"key":"21_CR17","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Toshev, A., Szegedy, C.: Deeppose: human pose estimation via deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1653\u20131660 (2014)","DOI":"10.1109\/CVPR.2014.214"},{"key":"21_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIP.2023.3334954","volume":"33","author":"X Wang","year":"2023","unstructured":"Wang, X., Zhang, W., Wang, C., Gao, Y., Liu, M.: Dynamic dense graph convolutional network for skeleton-based human motion prediction. IEEE Trans. Image Process. 33, 1\u201315 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Xing, Z., Dai, Q., Hu, H., Chen, J., Wu, Z., Jiang, Y.G.: Svformer: semi-supervised video transformer for action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18816\u201318826 (2023)","DOI":"10.1109\/CVPR52729.2023.01804"},{"key":"21_CR22","unstructured":"Yang, J., Martinez, B., Bulat, A., Tzimiropoulos, G.: Knowledge distillation via softmax regression representation learning. In: International Conference on Learning Representations (2020)"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: Focal and global knowledge distillation for detectors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4643\u20134652 (2022)","DOI":"10.1109\/CVPR52688.2022.00460"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Yang, Z., Li, Z., Shao, M., Shi, D., Yuan, Z., Yuan, C.: Masked generative distillation. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XI, pp. 53\u201369. Springer (2022)","DOI":"10.1007\/978-3-031-20083-0_4"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zeng, A., Li, Z., Zhang, T., Yuan, C., Li, Y.: From knowledge distillation to self-knowledge distillation: a unified approach with normalized loss and customized soft labels. arXiv preprint arXiv:2303.13005 (2023)","DOI":"10.1109\/ICCV51070.2023.01576"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zeng, A., Yuan, C., Li, Y.: Effective whole-body pose estimation with two-stages distillation. arXiv preprint arXiv:2307.15880 (2023)","DOI":"10.1109\/ICCVW60793.2023.00455"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhu, X., Ye, M.: Fast human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3517\u20133526 (2019)","DOI":"10.1109\/CVPR.2019.00363"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Zhao, B., Cui, Q., Song, R., Qiu, Y., Liang, J.: Decoupled knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11953\u201311962 (2022)","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"21_CR29","unstructured":"Zhao, S., et al.: Uni-controlnet: All-in-one control to text-to-image diffusion models. arXiv preprint arXiv:2305.16322 (2023)"},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Zhu, W., Ma, X., Liu, Z., Liu, L., Wu, W., Wang, Y.: Motionbert: a unified perspective on learning human motion representations. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15085\u201315099 (2023)","DOI":"10.1109\/ICCV51070.2023.01385"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91575-8_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T17:57:18Z","timestamp":1748195838000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91575-8_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031915741","9783031915758"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91575-8_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}