{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T08:44:45Z","timestamp":1762073085393,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":40,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031189067"},{"type":"electronic","value":"9783031189074"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-18907-4_51","type":"book-chapter","created":{"date-parts":[[2022,10,26]],"date-time":"2022-10-26T23:03:53Z","timestamp":1666825433000},"page":"660-673","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["KITPose: Keypoint-Interactive Transformer for\u00a0Animal Pose Estimation"],"prefix":"10.1007","author":[{"given":"Jiyong","family":"Rao","sequence":"first","affiliation":[]},{"given":"Tianyang","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Xiaoning","family":"Song","sequence":"additional","affiliation":[]},{"given":"Zhen-Hua","family":"Feng","sequence":"additional","affiliation":[]},{"given":"Xiao-Jun","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,27]]},"reference":[{"key":"51_CR1","unstructured":"Yu, H., Xu, Y., Zhang, J., Zhao, W., Guan, Z., Tao, D.: AP-10K: a benchmark for animal pose estimation in the wild. arXiv preprint arXiv:2108.12617 (2021)"},{"key":"51_CR2","doi-asserted-by":"crossref","unstructured":"Li, S., Li, J., Tang, H., Qian, R., Lin, W.: ATRW: a benchmark for amur tiger re-identification in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 2590\u20132598 (2020)","DOI":"10.1145\/3394171.3413569"},{"key":"51_CR3","doi-asserted-by":"crossref","unstructured":"Pereira, T.D., et al.: SLEAP: multi-animal pose tracking. bioRXiv (2020)","DOI":"10.1101\/2020.08.31.276246"},{"key":"51_CR4","doi-asserted-by":"publisher","unstructured":"Pereira, T.D., et al.: SLEAP: a deep learning system for multi-animal pose tracking. Nat. Methods 19, 486\u2013495 (2022). https:\/\/doi.org\/10.1038\/s41592-022-01426-1","DOI":"10.1038\/s41592-022-01426-1"},{"key":"51_CR5","doi-asserted-by":"publisher","unstructured":"Lauer, J., et al.: Multi-animal pose estimation, identification and tracking with DeepLabCut. Nat. Methods 19, 496\u2013504 (2022). https:\/\/doi.org\/10.1038\/s41592-022-01443-0","DOI":"10.1038\/s41592-022-01443-0"},{"key":"51_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"51_CR7","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., Schiele, B.: 2D human pose estimation: new benchmark and state of the art analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3686\u20133693 (2014)","DOI":"10.1109\/CVPR.2014.471"},{"key":"51_CR8","doi-asserted-by":"crossref","unstructured":"Wei, S.E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4724\u20134732 (2016)","DOI":"10.1109\/CVPR.2016.511"},{"key":"51_CR9","doi-asserted-by":"crossref","unstructured":"Cao, Z., Simon, T., Wei, S.E., Sheikh, Y.: Realtime multi-person 2D pose estimation using part affinity fields. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7291\u20137299 (2017)","DOI":"10.1109\/CVPR.2017.143"},{"key":"51_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"483","DOI":"10.1007\/978-3-319-46484-8_29","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Newell","year":"2016","unstructured":"Newell, A., Yang, K., Deng, J.: Stacked hourglass networks for human pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 483\u2013499. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_29"},{"key":"51_CR11","doi-asserted-by":"crossref","unstructured":"Fang, H.S., Xie, S., Tai, Y.W., Lu, C.: RMPE: regional multi-person pose estimation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2334\u20132343 (2017)","DOI":"10.1109\/ICCV.2017.256"},{"key":"51_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1007\/978-3-030-01231-1_29","volume-title":"Computer Vision \u2013 ECCV 2018","author":"B Xiao","year":"2018","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11210, pp. 472\u2013487. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01231-1_29"},{"key":"51_CR13","doi-asserted-by":"crossref","unstructured":"Wang, J., et al.: Deep high-resolution representation learning for visual recognition. IEEE Trans. Pattern Anal. Mach. Intell. 43(10), 3349\u20133364 (2020)","DOI":"10.1109\/TPAMI.2020.2983686"},{"key":"51_CR14","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"51_CR15","doi-asserted-by":"crossref","unstructured":"Cheng, B., et al.: HigherHRNet: scale-aware representation learning for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5386\u20135395 (2020)","DOI":"10.1109\/CVPR42600.2020.00543"},{"key":"51_CR16","unstructured":"mmPose Contributor: OpenMMLab pose estimation toolbox and benchmark. https:\/\/github.com\/open-mmlab\/mmpose (2020)"},{"key":"51_CR17","doi-asserted-by":"crossref","unstructured":"Li, K., et al.: Pose recognition with cascade transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1944\u20131953 (2021)","DOI":"10.1109\/CVPR46437.2021.00198"},{"key":"51_CR18","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: TransPose: keypoint localization via transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"51_CR19","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: TokenPose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"51_CR20","unstructured":"Yuan, Y., et al.: HRFormer: high-Resolution Vision Transformer for Dense Predict. In: Advances in Neural Information Processing Systems 34 (2021)"},{"key":"51_CR21","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"key":"51_CR22","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"51_CR23","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"51_CR24","doi-asserted-by":"crossref","unstructured":"Mathis, A., at al.: Pretraining boosts out-of-domain robustness for pose estimation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1859\u20131868 (2021)","DOI":"10.1109\/WACV48630.2021.00190"},{"key":"51_CR25","doi-asserted-by":"publisher","unstructured":"Graving, J.M., Chae, D., et al.: DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning. eLife 8, e47994 (2019). https:\/\/doi.org\/10.7554\/eLife.47994","DOI":"10.7554\/eLife.47994"},{"key":"51_CR26","doi-asserted-by":"crossref","unstructured":"Cao, J., Tang, H., Fang, H.S., Shen, X., Lu, C., Tai, Y.W.: Cross-domain adaptation for animal pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9498\u20139507 (2019)","DOI":"10.1109\/ICCV.2019.00959"},{"key":"51_CR27","doi-asserted-by":"crossref","unstructured":"Li, C., Lee, G.H.: From synthetic to real: unsupervised domain adaptation for animal pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1482\u20131491 (2021)","DOI":"10.1109\/CVPR46437.2021.00153"},{"key":"51_CR28","doi-asserted-by":"crossref","unstructured":"Labuguen, R., et al.: MacaquePose: a novel \u201cin the wild\u201d macaque monkey pose dataset for markerless motion capture. bioRxiv (2020)","DOI":"10.1101\/2020.07.30.229989"},{"issue":"1","key":"51_CR29","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1038\/s41592-018-0234-5","volume":"16","author":"TD Pereira","year":"2019","unstructured":"Pereira, T.D., et al.: Fast animal pose estimation using deep neural networks. Nat. Methods 16(1), 117\u2013125 (2019)","journal-title":"Nat. Methods"},{"key":"51_CR30","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"51_CR31","unstructured":"Newell, A., Huang, Z., Deng, J.: Associative embedding: end-to-end learning for joint detection and grouping. In: Advances in Neural Information Processing systems 30 (2017)"},{"key":"51_CR32","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems 30 (2017)"},{"key":"51_CR33","doi-asserted-by":"crossref","unstructured":"Liu, Z., at al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"51_CR34","doi-asserted-by":"crossref","unstructured":"Mu, J., Qiu, W., Hager, G.D., Yuille, A.L.: Learning from synthetic animals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12386\u201312395 (2020)","DOI":"10.1109\/CVPR42600.2020.01240"},{"key":"51_CR35","doi-asserted-by":"crossref","unstructured":"Zhang, F., Zhu, X., Dai, H., Ye, M., Zhu, C.: Distribution-aware coordinate representation for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7093\u20137102 (2020)","DOI":"10.1109\/CVPR42600.2020.00712"},{"key":"51_CR36","doi-asserted-by":"crossref","unstructured":"Geng, Z., et al.: Bottom-up human pose estimation via disentangled keypoint regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14676\u201314686 (2021)","DOI":"10.1109\/CVPR46437.2021.01444"},{"key":"51_CR37","doi-asserted-by":"crossref","unstructured":"Luo, Z., et al.: Rethinking the heatmap regression for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13264\u201313273 (2021)","DOI":"10.1109\/CVPR46437.2021.01306"},{"key":"51_CR38","doi-asserted-by":"crossref","unstructured":"Jin, L., et al.: Grouping by Center: predicting Centripetal Offsets for the bottom-up human pose estimation. IEEE Trans. Multimedia (2022)","DOI":"10.1109\/TMM.2022.3159111"},{"key":"51_CR39","doi-asserted-by":"crossref","unstructured":"Harding, E.J., Paul, E.S., Mendl, M.: Cognitive bias and affective state. Nature 427(6972), 312 (2004)","DOI":"10.1038\/427312a"},{"key":"51_CR40","doi-asserted-by":"crossref","unstructured":"Touvron, H., et al.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357 (2021)","DOI":"10.1109\/ICCV48922.2021.00010"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-18907-4_51","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,6]],"date-time":"2024-10-06T16:33:18Z","timestamp":1728232398000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-18907-4_51"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031189067","9783031189074"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-18907-4_51","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"27 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenzhen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/en.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"microsoft","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"564","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"233","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"41% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.03","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.35","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}