{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:43:16Z","timestamp":1768272196223,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":35,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556755","type":"print"},{"value":"9789819556762","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5676-2_24","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:05Z","timestamp":1768249925000},"page":"354-368","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Deformable Epipolar Transformer for\u00a0Robust 3D Human Pose Estimation"],"prefix":"10.1007","author":[{"given":"Yang","family":"Gao","sequence":"first","affiliation":[]},{"given":"Xiaoqi","family":"An","sequence":"additional","affiliation":[]},{"given":"Di","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Lin","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"issue":"9\/10","key":"24_CR1","doi-asserted-by":"publisher","first-page":"1333","DOI":"10.1108\/k.2001.30.9_10.1333.2","volume":"30","author":"AM Andrew","year":"2001","unstructured":"Andrew, A.M.: Multiple view geometry in computer vision. Kybernetes 30(9\/10), 1333\u20131341 (2001)","journal-title":"Kybernetes"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Boukhayma, A., Bem, R.d., Torr, P.H.: 3D hand shape and pose from images in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10843\u201310852 (2019)","DOI":"10.1109\/CVPR.2019.01110"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Y., Ma, H., Wang, J., Wu, J., Wu, X., Xie, X.: PD-Net: quantitative motor function evaluation for Parkinson\u2019s disease via automated hand gesture analysis. In: Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining, pp. 2683\u20132691 (2021)","DOI":"10.1145\/3447548.3467130"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Dai, J., et al.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"1","key":"24_CR6","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1109\/JBHI.2017.2659758","volume":"22","author":"B Fasel","year":"2017","unstructured":"Fasel, B., Sp\u00f6rri, J., Chardonnens, J., Kr\u00f6ll, J., M\u00fcller, E., Aminian, K.: Joint inertial sensor orientation drift reduction for highly dynamic movements. IEEE J. Biomed. Health Inform. 22(1), 77\u201386 (2017)","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Ge, L., et al.: 3D hand shape and pose estimation from a single RGB image. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10833\u201310842 (2019)","DOI":"10.1109\/CVPR.2019.01109"},{"key":"24_CR8","doi-asserted-by":"crossref","unstructured":"He, Y., Yan, R., Fragkiadaki, K., Yu, S.I.: Epipolar transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7779\u20137788 (2020)","DOI":"10.1109\/CVPR42600.2020.00780"},{"key":"24_CR9","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wan, C., Probst, T., Van\u00a0Gool, L.: Deep learning on lie groups for skeleton-based action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6099\u20136108 (2017)","DOI":"10.1109\/CVPR.2017.137"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6M: large scale datasets and predictive methods for 3D human sensing in natural environments. IEEE Trans. Pattern Anal. Mach. Intell. 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"24_CR11","doi-asserted-by":"crossref","unstructured":"Iskakov, K., Burkov, E., Lempitsky, V., Malkov, Y.: Learnable triangulation of human pose. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7718\u20137727 (2019)","DOI":"10.1109\/ICCV.2019.00781"},{"key":"24_CR12","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Li, J., He, X., Zhou, C., Cheng, X., Wen, Y., Zhang, D.: ViewFormer: exploring spatiotemporal modeling for multi-view 3D occupancy perception via view-guided transformers. In: European Conference on Computer Vision, pp. 90\u2013106. Springer (2024)","DOI":"10.1007\/978-3-031-72775-7_6"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: TokenPose: learning keypoint tokens for human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 11313\u201311322 (2021)","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"24_CR15","doi-asserted-by":"crossref","unstructured":"Liu, T., Ye, X., Zhao, W., Pan, Z., Shi, M., Cao, Z.: When Epipolar constraint meets non-local operators in multi-view stereo. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 18088\u201318097 (2023)","DOI":"10.1109\/ICCV51070.2023.01658"},{"key":"24_CR16","doi-asserted-by":"crossref","unstructured":"Ma, H., et al.: TransFusion: cross-view fusion with transformer for 3d human pose estimation. In: British Machine Vision Conference (2021)","DOI":"10.5244\/C.35.5"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Ma, H., et al.: PPT: token-pruned pose transformer for monocular and multi-view human pose estimation. In: European Conference on Computer Vision, pp. 424\u2013442. Springer (2022)","DOI":"10.1007\/978-3-031-20065-6_25"},{"issue":"4","key":"24_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3072959.3073596","volume":"36","author":"D Mehta","year":"2017","unstructured":"Mehta, D., et al.: VNect: real-time 3D human pose estimation with a single RGB camera. ACM Trans. Graph. (TOG) 36(4), 1\u201314 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Moon, G., Chang, J.Y., Lee, K.M.: V2V-PoseNet: voxel-to-voxel prediction network for accurate 3D hand and human pose estimation from a single depth map. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5079\u20135088 (2018)","DOI":"10.1109\/CVPR.2018.00533"},{"key":"24_CR20","doi-asserted-by":"crossref","unstructured":"Qiu, H., Wang, C., Wang, J., Wang, N., Zeng, W.: Cross view fusion for 3D human pose estimation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4342\u20134351 (2019)","DOI":"10.1109\/ICCV.2019.00444"},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., Wang, J.: Deep high-resolution representation learning for human pose estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5693\u20135703 (2019)","DOI":"10.1109\/CVPR.2019.00584"},{"key":"24_CR22","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1007\/978-3-030-58452-8_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Tu","year":"2020","unstructured":"Tu, H., Wang, C., Zeng, W.: VoxelPose: towards multi-camera 3D human pose estimation in wild environment. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 197\u2013212. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_12"},{"issue":"10","key":"24_CR23","doi-asserted-by":"publisher","first-page":"4540","DOI":"10.1109\/TIP.2016.2592800","volume":"25","author":"D Wang","year":"2016","unstructured":"Wang, D., Gao, X., Wang, X., He, L., Yuan, B.: Multimodal discriminative binary embedding for large-scale cross-modal retrieval. IEEE Trans. Image Process. 25(10), 4540\u20134554 (2016)","journal-title":"IEEE Trans. Image Process."},{"key":"24_CR24","doi-asserted-by":"publisher","first-page":"4909","DOI":"10.1109\/TMM.2022.3183830","volume":"25","author":"D Wang","year":"2022","unstructured":"Wang, D., Liu, S., Wang, Q., Tian, Y., He, L., Gao, X.: Cross-modal enhancement network for multimodal sentiment analysis. IEEE Trans. Multimedia 25, 4909\u20134921 (2022)","journal-title":"IEEE Trans. Multimedia"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: MVSTER: epipolar transformer for efficient multi-view stereo. In: European Conference on Computer Vision, pp. 573\u2013591. Springer (2022)","DOI":"10.1007\/978-3-031-19821-2_33"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"24_CR27","doi-asserted-by":"crossref","unstructured":"Wang, Z., Yang, J., Fowlkes, C.: The best of both worlds: combining model-based and nonparametric approaches for 3D human body estimation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2318\u20132327 (2022)","DOI":"10.1109\/CVPRW56347.2022.00258"},{"key":"24_CR28","doi-asserted-by":"crossref","unstructured":"Xiao, B., Wu, H., Wei, Y.: Simple baselines for human pose estimation and tracking. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 466\u2013481 (2018)","DOI":"10.1007\/978-3-030-01231-1_29"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Yan, S., Xiong, Y., Lin, D.: Spatial temporal graph convolutional networks for skeleton-based action recognition. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12328"},{"key":"24_CR30","first-page":"13153","volume":"34","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Cai, Y., Yan, S., Feng, J., et al.: Direct multi-view multi-person 3D pose estimation. Adv. Neural. Inf. Process. Syst. 34, 13153\u201313164 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"24_CR31","doi-asserted-by":"crossref","unstructured":"Zhang, X., Li, Q., Mo, H., Zhang, W., Zheng, W.: End-to-end hand mesh recovery from a monocular RGB image. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2354\u20132364 (2019)","DOI":"10.1109\/ICCV.2019.00244"},{"key":"24_CR32","doi-asserted-by":"publisher","first-page":"703","DOI":"10.1007\/s11263-020-01398-9","volume":"129","author":"Z Zhang","year":"2021","unstructured":"Zhang, Z., Wang, C., Qiu, W., Qin, W., Zeng, W.: AdaFuse: adaptive multiview fusion for accurate human pose estimation in the wild. Int. J. Comput. Vision 129, 703\u2013718 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"24_CR33","doi-asserted-by":"crossref","unstructured":"Zhou, B., Kr\u00e4henb\u00fchl, P.: Cross-view transformers for real-time map-view semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13760\u201313769 (2022)","DOI":"10.1109\/CVPR52688.2022.01339"},{"key":"24_CR34","doi-asserted-by":"crossref","unstructured":"Zhu, X., Hu, H., Lin, S., Dai, J.: Deformable ConvNets v2: more deformable, better results. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9308\u20139316 (2019)","DOI":"10.1109\/CVPR.2019.00953"},{"key":"24_CR35","doi-asserted-by":"crossref","unstructured":"Zimmermann, C., Ceylan, D., Yang, J., Russell, B., Argus, M., Brox, T.: FreiHAND: a dataset for Markerless capture of hand pose and shape from single RGB images. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 813\u2013822 (2019)","DOI":"10.1109\/ICCV.2019.00090"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5676-2_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:12Z","timestamp":1768249932000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5676-2_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556755","9789819556762"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5676-2_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}