{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:54:29Z","timestamp":1772830469935,"version":"3.50.1"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T00:00:00Z","timestamp":1724284800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,22]],"date-time":"2024-08-22T00:00:00Z","timestamp":1724284800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771299"],"award-info":[{"award-number":["61771299"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771299"],"award-info":[{"award-number":["61771299"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61771299"],"award-info":[{"award-number":["61771299"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s00138-024-01602-7","type":"journal-article","created":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T14:24:34Z","timestamp":1724423074000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Enhanced keypoint information and pose-weighted re-ID features for multi-person pose estimation and tracking"],"prefix":"10.1007","volume":"35","author":[{"given":"Xiangyang","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tao","family":"Pei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rui","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"1602_CR1","doi-asserted-by":"crossref","unstructured":"Lin, T., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.: Microsoft coco: Common objects in context. In: Computer Vision-ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755 (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"issue":"01","key":"1602_CR2","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2019.2929257","volume":"43","author":"Z Cao","year":"2021","unstructured":"Cao, Z., Simon, T., Wei, S.-E., Sheikh, Y.: Openpose: realtime multi-person 2d pose estimation using part affinity fields. IEEE Trans. Pattern Anal. Mach. Intell. 43(01), 172\u2013186 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"1602_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3603618","volume":"56","author":"C Zheng","year":"2023","unstructured":"Zheng, C., Wu, W., Chen, C., Yang, T., Zhu, S., Shen, J., Kehtarnavaz, N., Shah, M.: Deep learning-based human pose estimation: a survey. ACM Comput. Surv. 56(1), 1\u201337 (2023)","journal-title":"ACM Comput. Surv."},{"key":"1602_CR4","doi-asserted-by":"crossref","unstructured":"Iqbal, U., Milan, A., Gall, J.: Posetrack: Joint multi-person pose estimation and tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2011\u20132020 (2018)","DOI":"10.1109\/CVPR.2017.495"},{"key":"1602_CR5","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"1602_CR6","doi-asserted-by":"crossref","unstructured":"Liu, M., Yuan, J.: Recognizing human actions as the evolution of pose estimation maps. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1159\u20131168 (2018)","DOI":"10.1109\/CVPR.2018.00127"},{"key":"1602_CR7","doi-asserted-by":"crossref","unstructured":"Black, D., Salcudean, S.: Robust object pose tracking for augmented reality guidance and teleoperation. Authorea Preprints (2023)","DOI":"10.36227\/techrxiv.24045459"},{"key":"1602_CR8","doi-asserted-by":"crossref","unstructured":"Wang, M., Tighe, J., Modolo, D.: Combining detection and tracking for human pose estimation in videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11088\u201311096 (2020)","DOI":"10.1109\/CVPR42600.2020.01110"},{"key":"1602_CR9","doi-asserted-by":"crossref","unstructured":"Zhou, C., Ren, Z., Hua, G.: Temporal keypoint matching and refinement network for pose estimation and tracking. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XXII 16, pp. 680\u2013695 (2020)","DOI":"10.1007\/978-3-030-58542-6_41"},{"key":"1602_CR10","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., Farhadi, A.: You only look once: Unified, real-time object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 779\u2013788 (2016)","DOI":"10.1109\/CVPR.2016.91"},{"key":"1602_CR11","unstructured":"Wang, C.-Y., Yeh, I.-H., Liao, H.-Y.M.: You only learn one representation: Unified network for multiple tasks. arXiv preprint arXiv:2105.04206 (2021)"},{"key":"1602_CR12","doi-asserted-by":"crossref","unstructured":"Seidenschwarz, J., Bras\u00f3, G., Serrano, V.C., Elezi, I., Leal-Taix\u00e9, L.: Simple cues lead to a strong multi-object tracker. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13813\u201313823 (2023)","DOI":"10.1109\/CVPR52729.2023.01327"},{"key":"1602_CR13","unstructured":"Zhang, J., Zhu, Z., Zou, W., Li, P., Li, Y., Su, H., Huang, G.: Fastpose: Towards real-time pose estimation and tracking via scale-normalized multi-task networks. arXiv preprint arXiv:1908.05593 (2019)"},{"key":"1602_CR14","doi-asserted-by":"crossref","unstructured":"Rafi, U., Doering, A., Leibe, B., Gall, J.: Self-supervised keypoint correspondences for multi-person pose estimation and tracking in videos. In: Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part XX 16, pp. 36\u201352 (2020)","DOI":"10.1007\/978-3-030-58565-5_3"},{"key":"1602_CR15","doi-asserted-by":"crossref","unstructured":"Wojke, N., Bewley, A., Paulus, D.: Simple online and realtime tracking with a deep association metric. In: 2017 IEEE International Conference on Image Processing (ICIP), pp. 3645\u20133649 (2017)","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"1602_CR16","doi-asserted-by":"crossref","unstructured":"Luo, Z., Wang, Z., Huang, Y., Wang, L., Tan, T., Zhou, E.: Rethinking the heatmap regression for bottom-up human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13264\u201313273 (2021)","DOI":"10.1109\/CVPR46437.2021.01306"},{"key":"1602_CR17","doi-asserted-by":"crossref","unstructured":"Geng, Z., Sun, K., Xiao, B., Zhang, Z., Wang, J.: Bottom-up human pose estimation via disentangled keypoint regression. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14676\u201314686 (2021)","DOI":"10.1109\/CVPR46437.2021.01444"},{"issue":"8","key":"1602_CR18","doi-asserted-by":"publisher","first-page":"13498","DOI":"10.1109\/TITS.2021.3124981","volume":"23","author":"S Kreiss","year":"2022","unstructured":"Kreiss, S., Bertoni, L., Alahi, A.: Openpifpaf: Composite fields for semantic keypoint detection and spatio-temporal association. IEEE Trans. Intell. Transp. Syst. 23(8), 13498\u201313511 (2022)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"1602_CR19","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1602_CR20","doi-asserted-by":"crossref","unstructured":"Ning, G., Pei, J., Huang, H.: Lighttrack: A generic framework for online top-down human pose tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, pp. 1034\u20131035 (2020)","DOI":"10.1109\/CVPRW50498.2020.00525"},{"key":"1602_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Z., Chen, H., Feng, R., Wu, S., Ji, S., Yang, B., Wang, X.: Deep dual consecutive network for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 525\u2013534 (2021)","DOI":"10.1109\/CVPR46437.2021.00059"},{"key":"1602_CR22","doi-asserted-by":"crossref","unstructured":"Fang, H.-S., Li, J., Tang, H., Xu, C., Zhu, H., Xiu, Y., Li, Y.-L., Lu, C.: Alphapose: Whole-body regional multi-person pose estimation and tracking in real-time. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)","DOI":"10.1109\/TPAMI.2022.3222784"},{"key":"1602_CR23","doi-asserted-by":"crossref","unstructured":"Feng, R., Gao, Y., Tse, T.H.E., Ma, X., Chang, H.J.: Diffpose: Spatiotemporal diffusion model for video-based human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 14861\u201314872 (2023)","DOI":"10.1109\/ICCV51070.2023.01365"},{"key":"1602_CR24","doi-asserted-by":"publisher","first-page":"3973","DOI":"10.1109\/TIP.2022.3177959","volume":"31","author":"Y Dang","year":"2022","unstructured":"Dang, Y., Yin, J., Zhang, S.: Relation-based associative joint location for human pose estimation in videos. IEEE Trans. Image Process. 31, 3973\u20133986 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"1602_CR25","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1602_CR26","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"1602_CR27","doi-asserted-by":"crossref","unstructured":"Yang, S., Quan, Z., Nie, M., Yang, W.: Transpose: Keypoint localization via transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11802\u201311812 (2021)","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"1602_CR28","doi-asserted-by":"crossref","unstructured":"Li, Y., Zhang, S., Wang, Z., Yang, S., Yang, W., Xia, S.-T., Zhou, E.: Tokenpose: Learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"1602_CR29","unstructured":"Yuan, Y., Rao, F., Lang, H., Lin, W., Zhang, C., Chen, X., Wang, J.: Hrformer: High-resolution transformer for dense prediction. arXiv preprint arXiv:2110.0940819 (2021)"},{"key":"1602_CR30","first-page":"38571","volume":"35","author":"Y Xu","year":"2022","unstructured":"Xu, Y., Zhang, J., Zhang, Q., Tao, D.: Vitpose: Simple vision transformer baselines for human pose estimation. Adv. Neural. Inf. Process. Syst. 35, 38571\u201338584 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1602_CR31","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"1602_CR32","doi-asserted-by":"crossref","unstructured":"Raaj, Y., Idrees, H., Hidalgo, G., Sheikh, Y.: Efficient online multi-person 2d pose tracking with recurrent spatio-temporal affinity fields. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4620\u20134628 (2019)","DOI":"10.1109\/CVPR.2019.00475"},{"key":"1602_CR33","doi-asserted-by":"crossref","unstructured":"Jin, S., Liu, W., Ouyang, W., Qian, C.: Multi-person articulated tracking with spatial and temporal embeddings. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5664\u20135673 (2019)","DOI":"10.1109\/CVPR.2019.00581"},{"key":"1602_CR34","doi-asserted-by":"crossref","unstructured":"Doering, A., Chen, D., Zhang, S., Schiele, B., Gall, J.: Posetrack21: A dataset for person search, multi-object tracking and multi-person pose tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 20963\u201320972 (2022)","DOI":"10.1109\/CVPR52688.2022.02029"},{"key":"1602_CR35","doi-asserted-by":"crossref","unstructured":"Yang, Y., Ren, Z., Li, H., Zhou, C., Wang, X., Hua, G.: Learning dynamics via graph neural networks for human pose estimation and tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8074\u20138084 (2021)","DOI":"10.1109\/CVPR46437.2021.00798"},{"key":"1602_CR36","unstructured":"Doering, A., Iqbal, U., Gall, J.: Joint flow: Temporal flow fields for multi person tracking. arXiv preprint arXiv:1805.04596 (2018)"},{"key":"1602_CR37","doi-asserted-by":"crossref","unstructured":"Hwang, J., Lee, J., Park, S., Kwak, N.: Pose estimator and tracker using temporal flow maps for limbs. In: 2019 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138 (2019). IEEE","DOI":"10.1109\/IJCNN.2019.8851734"},{"key":"1602_CR38","doi-asserted-by":"crossref","unstructured":"Fu, Z., Zuo, W., Hu, Z., Liu, Q., Wang, Y.: Improving multi-person pose tracking with a confidence network. IEEE Transactions on Multimedia (2023)","DOI":"10.1109\/TMM.2023.3330532"},{"key":"1602_CR39","doi-asserted-by":"crossref","unstructured":"Doering, A., Gall, J.: A gated attention transformer for multi-person pose tracking. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3189\u20133198 (2023)","DOI":"10.1109\/ICCVW60793.2023.00343"},{"issue":"3","key":"1602_CR40","first-page":"1","volume":"1341","author":"D Erhan","year":"2009","unstructured":"Erhan, D., Bengio, Y., Courville, A., Vincent, P.: Visualizing higher-layer features of a deep network. Univ. Montreal 1341(3), 1 (2009)","journal-title":"Univ. Montreal"},{"key":"1602_CR41","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1602_CR42","doi-asserted-by":"crossref","unstructured":"Bewley, A., Ge, Z., Ott, L., Ramos, F., Upcroft, B.: Simple online and realtime tracking. In: 2016 IEEE International Conference on Image Processing (ICIP), pp. 3464\u20133468 (2016)","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"1602_CR43","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Iqbal, U., Insafutdinov, E., Pishchulin, L., Milan, A., Gall, J., Schiele, B.: Posetrack: A benchmark for human pose estimation and tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5167\u20135176 (2018)","DOI":"10.1109\/CVPR.2018.00542"},{"key":"1602_CR44","unstructured":"Bertasius, G., Feichtenhofer, C., Tran, D., Shi, J., Torresani, L.: Learning temporal pose estimation from sparsely-labeled videos. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"1602_CR45","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Gkioxari, G., Torresani, L., Paluri, M., Tran, D.: Detect-and-track: Efficient pose estimation in videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 350\u2013359 (2018)","DOI":"10.1109\/CVPR.2018.00044"},{"key":"1602_CR46","doi-asserted-by":"crossref","unstructured":"Guo, H., Tang, T., Luo, G., Chen, R., Lu, Y., Wen, L.: Multi-domain pose network for multi-person pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, pp. 0\u20130 (2020)","DOI":"10.1007\/978-3-030-11012-3_17"},{"key":"1602_CR47","unstructured":"Xiu, Y., Li, J., Wang, H., Fang, Y., Lu, C.: Pose flow: Efficient online pose tracking. arXiv preprint arXiv:1802.00977 (2018)"},{"key":"1602_CR48","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1109\/TMM.2020.2980194","volume":"23","author":"Q Bao","year":"2020","unstructured":"Bao, Q., Liu, W., Cheng, Y., Zhou, B., Mei, T.: Pose-guided tracking-by-detection: Robust multi-person pose tracking. IEEE Trans. Multimedia 23, 161\u2013175 (2020)","journal-title":"IEEE Trans. Multimedia"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01602-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01602-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01602-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T04:07:31Z","timestamp":1726027651000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01602-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,22]]},"references-count":48,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["1602"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01602-7","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"value":"0932-8092","type":"print"},{"value":"1432-1769","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,22]]},"assertion":[{"value":"30 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 July 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 August 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"119"}}