{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T18:20:30Z","timestamp":1766600430739},"reference-count":93,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T00:00:00Z","timestamp":1635206400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T00:00:00Z","timestamp":1635206400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,1]]},"DOI":"10.1007\/s11263-021-01525-0","type":"journal-article","created":{"date-parts":[[2021,10,26]],"date-time":"2021-10-26T14:02:37Z","timestamp":1635256957000},"page":"56-75","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":7,"title":["Learning a Robust Part-Aware Monocular 3D Human Pose Estimator via Neural Architecture Search"],"prefix":"10.1007","volume":"130","author":[{"given":"Zerui","family":"Chen","sequence":"first","affiliation":[]},{"given":"Yan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Hongyuan","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Liang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,10,26]]},"reference":[{"key":"1525_CR1","doi-asserted-by":"crossref","unstructured":"Alldieck, T,. Pons-Moll, G., Theobalt, C., & Magnor, M. (2019). Tex2shape: Detailed full human body geometry from a single image. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00238"},{"key":"1525_CR2","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Pishchulin, L., Gehler, P., & Schiele, B. (2014). 2d human pose estimation: New benchmark and state of the art analysis. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2014.471"},{"key":"1525_CR3","doi-asserted-by":"crossref","unstructured":"Baek, S., Kim, K. I., & Kim, T. K. (2018). Augmented skeleton space transfer for depth-based hand pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00869"},{"key":"1525_CR4","unstructured":"Baker, B., Gupta, O., Naik, N., & Raskar, R. (2017). Designing neural network architectures using reinforcement learning. In International conference on learning representations (ICLR)."},{"key":"1525_CR5","doi-asserted-by":"crossref","unstructured":"Belagiannis, V., Amin, S., Andriluka, M., Schiele, B., Navab, N., & Ilic, S. (2014). 3d pictorial structures for multiple human pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2014.216"},{"key":"1525_CR6","doi-asserted-by":"crossref","unstructured":"Bogo, F., Kanazawa, A., Lassner, C., Gehler, P., Romero, J., & Black, M. J. (2016). Keep it smpl: Automatic estimation of 3d human pose and shape from a single image. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"1525_CR7","doi-asserted-by":"crossref","unstructured":"Burenius, M., Sullivan, J., & Carlsson, S. (2013). 3d pictorial structures for multiple view articulated pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2013.464"},{"key":"1525_CR8","doi-asserted-by":"crossref","unstructured":"Cai, Y., Ge, L., Liu, J., Cai, J., Cham, T. J., Yuan, J., & Thalmann, N. M. (2019b). Exploiting spatial-temporal relationships for 3d pose estimation via graph convolutional networks. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00236"},{"key":"1525_CR9","unstructured":"Cai, H., Zhu, L., & Han, S. (2019a). Proxylessnas: Direct neural architecture search on target task and hardware. In International conference on learning representations (ICLR)."},{"key":"1525_CR10","doi-asserted-by":"crossref","unstructured":"Chen, C. H., & Ramanan, D. (2017). 3d human pose estimation = 2d pose estimation + matching. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.610"},{"key":"1525_CR11","doi-asserted-by":"crossref","unstructured":"Chen, L. C., Collins, M., Zhu, Y., Papandreou, G., Zoph, B., Schroff, F., Adam, H., & Shlens, J. (2018). Searching for efficient multi-scale architectures for dense image prediction. In Conference on neural information processing systems (NeurIPS).","DOI":"10.1007\/978-3-030-04167-0"},{"key":"1525_CR12","unstructured":"Chen, Z., Guo, Y., Huang, Y., & Liang, W. (2019b). Learning depth-aware heatmaps for 3d human pose estimation in the wild. In British machine vision conference (BMVC)."},{"key":"1525_CR13","doi-asserted-by":"crossref","unstructured":"Chen, Z., Huang, Y., Yu, H., Xue, B., Han, K., Guo, Y., & Wang, L. (2020). Towards part-aware monocular 3d human pose estimation: An architecture search approach. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-58580-8_42"},{"key":"1525_CR14","unstructured":"Chen, Y., Yang, T., Zhang, X., Meng, G., Xiao, X., & Sun, J. (2019a). Detnas: Backbone search for object detection. In Conference on neural information processing systems (NeurIPS)."},{"key":"1525_CR15","doi-asserted-by":"crossref","unstructured":"Ci, H., Wang, C., Ma, X., & Wang, Y. (2019). Optimizing network structure for 3d human pose estimation. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00235"},{"key":"1525_CR16","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L. J., Li, K., & Fei-Fei, L. (2009). Imagenet: A large-scale hierarchical image database. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1525_CR17","doi-asserted-by":"crossref","unstructured":"Divvala, S. K., Efros, A. A., & Hebert, M. (2012). How important are \u201cdeformable parts\u201d in the deformable parts model? In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-642-33885-4_4"},{"key":"1525_CR18","doi-asserted-by":"crossref","unstructured":"Fabbri, M., Lanzi, F., Calderara, S., Alletto, S., & Cucchiara, R. (2020). Compressed volumetric heatmaps for multi-person 3d pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR42600.2020.00723"},{"key":"1525_CR19","doi-asserted-by":"crossref","unstructured":"Fang, H., Xu, Y., Wang, W., Liu, X., & Zhu, S. C. (2018). Learning knowledge-guided pose grammar machine for 3d human pose estimation. In AAAI conference on artificial intelligence (AAAI).","DOI":"10.1609\/aaai.v32i1.12270"},{"key":"1525_CR20","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P. F., Girshick, R. B., McAllester, D., & Ramanan, D. (2009). Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI) 32, 1627\u20131645.","DOI":"10.1109\/TPAMI.2009.167"},{"key":"1525_CR21","doi-asserted-by":"crossref","unstructured":"Ganapathi, V., Plagemann, C., Koller, D., & Thrun, S. (2010). Real time motion capture using a single time-of-flight camera. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2010.5540141"},{"key":"1525_CR22","doi-asserted-by":"crossref","unstructured":"Ghiasi, G., Lin, T. Y., & Le, Q. V. (2019). Nas-fpn: Learning scalable feature pyramid architecture for object detection. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00720"},{"key":"1525_CR23","doi-asserted-by":"crossref","unstructured":"Guo, Z., Zhang, X., Mu, H., Heng, W., Liu, Z., Wei, Y., & Sun, J. (2020). Single path one-shot neural architecture search with uniform sampling. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-58517-4_32"},{"key":"1525_CR24","doi-asserted-by":"crossref","unstructured":"Gupta, A., Martinez, J., Little. J. J., & Woodham, R. J. (2014). 3d pose from motion for cross-view action recognition via non-linear circulant temporal encoding. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2014.333"},{"key":"1525_CR25","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask r-cnn. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.322"},{"key":"1525_CR26","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1525_CR27","doi-asserted-by":"crossref","unstructured":"Hossain, M. R. I., & Little, J. J. (2018). Exploiting temporal information for 3d human pose estimation. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01249-6_5"},{"key":"1525_CR28","doi-asserted-by":"crossref","unstructured":"Howard, A., Sandler, M., Chu, G., Chen, L. C., Chen, B., Tan, M., Wang, W., Zhu, Y., Pang, R., Vasudevan, V., Le, Q. V. (2019). Searching for mobilenetv3. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00140"},{"key":"1525_CR29","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., & Sminchisescu, C. (2014). Human3.6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. In IEEE transactions on pattern analysis and machine intelligence (TPAMI).","DOI":"10.1109\/TPAMI.2013.248"},{"key":"1525_CR30","doi-asserted-by":"crossref","unstructured":"Iskakov, K., Burkov, E., Lempitsky, V., & Malkov, Y. (2019). Learnable triangulation of human pose. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00781"},{"key":"1525_CR31","doi-asserted-by":"crossref","unstructured":"Jiang, H. (2010). 3d human pose reconstruction using millions of exemplars. In International conference on pattern recognition (ICPR)","DOI":"10.1109\/ICPR.2010.414"},{"key":"1525_CR32","doi-asserted-by":"crossref","unstructured":"Jiang, W., Kolotouros, N., Pavlakos, G., Zhou, X., & Daniilidis, K. (2020). Coherent reconstruction of multiple humans from a single image. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR42600.2020.00562"},{"key":"1525_CR33","doi-asserted-by":"crossref","unstructured":"Joo, H., Liu, H., Tan, L., Gui, L., Nabbe, B., Matthews, I., Kanade, T., Nobuhara, S., & Sheikh, Y. (2015). Panoptic studio: A massively multiview system for social motion capture. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.381"},{"key":"1525_CR34","doi-asserted-by":"crossref","unstructured":"Kanazawa, A., Black, M. J., Jacobs, D. W., & Malik, J. (2018). End-to-end recovery of human shape and pose. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00744"},{"key":"1525_CR35","unstructured":"Kingma, D. P., & Ba, J. (2014). Adam: A method for stochastic optimization. In International conference on learning representations (ICLR)."},{"key":"1525_CR36","doi-asserted-by":"crossref","unstructured":"Kocabas, M., Karagoz, S., & Akbas, E. (2019). Self-supervised learning of 3d human pose using multi-view geometry. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00117"},{"key":"1525_CR37","doi-asserted-by":"crossref","unstructured":"Kolotouros, N., Pavlakos, G., & Daniilidis, K. (2019). Convolutional mesh regression for single-image human shape reconstruction. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00463"},{"key":"1525_CR38","doi-asserted-by":"crossref","unstructured":"Lee, H. J., & Chen, Z. (1985). Determination of 3d human body postures from a single view. In Computer vision, graphics, and image processing (CVGIP).","DOI":"10.1016\/0734-189X(85)90137-9"},{"key":"1525_CR39","unstructured":"Li, S., & Chan, A. B. (2014). 3d human pose estimation from monocular images with deep convolutional neural network. In Asian conference on computer vision (ACCV)."},{"key":"1525_CR40","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1525_CR41","doi-asserted-by":"crossref","unstructured":"Liu, C., Chen, L. C., Schroff, F., Adam, H., Hua, W., Yuille, A. L., & Fei-Fei, L. (2019a). Auto-deeplab: Hierarchical neural architecture search for semantic image segmentation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00017"},{"key":"1525_CR42","unstructured":"Liu, H., Simonyan, K., & Yang, Y. (2019b). Darts: Differentiable architecture search. In International conference on learning representations (ICLR)."},{"key":"1525_CR43","unstructured":"Loshchilov, I., & Hutter, F. (2019). Decoupled weight decay regularization. In International conference on learning representations (ICLR)."},{"key":"1525_CR44","volume-title":"Information theory, inference and learning algorithms","author":"DJ MacKay","year":"2003","unstructured":"MacKay, D. J., & Mac Kay, D. J. (2003). Information theory, inference and learning algorithms. Cambridge University Press."},{"key":"1525_CR45","doi-asserted-by":"crossref","unstructured":"Martinez, J., Hossain, R., Romero, J., & Little, J. J. (2017). A simple yet effective baseline for 3d human pose estimation. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.288"},{"key":"1525_CR46","doi-asserted-by":"crossref","unstructured":"Mehta, D., Rhodin, H., Casas, D., Fua, P., Sotnychenko, O., Xu, W., & Theobalt, C. (2017). Monocular 3d human pose estimation in the wild using improved cnn supervision. In International conference on 3D vision (3DV).","DOI":"10.1109\/3DV.2017.00064"},{"key":"1525_CR47","doi-asserted-by":"crossref","unstructured":"Mehta, D., Sotnychenko, O., Mueller, F., Xu, W., Sridhar, S., Pons-Moll, G., & Theobalt, C. (2018). Single-shot multi-person 3d pose estimation from monocular rgb. In International conference on 3D vision (3DV).","DOI":"10.1109\/3DV.2018.00024"},{"key":"1525_CR48","doi-asserted-by":"crossref","unstructured":"Moon, G., Chang, J. Y., & Lee, K. M. (2019). Camera distance-aware top-down approach for 3d multi-person pose estimation from a single rgb image. In IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.01023"},{"key":"1525_CR49","doi-asserted-by":"crossref","unstructured":"Moreno-Noguer, F. (2017). 3d human pose estimation from a single image via distance matrix regression. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.170"},{"key":"1525_CR50","doi-asserted-by":"crossref","unstructured":"Mueller, F., Davis, M., Bernard, F., Sotnychenko, O., Verschoor, M., Otaduy, M. A., et al. (2019). Real-time pose and shape reconstruction of two interacting hands with a single depth camera. ACM Transactions on Graphics (TOG) 38, 1\u201313.","DOI":"10.1145\/3306346.3322958"},{"key":"1525_CR51","doi-asserted-by":"crossref","unstructured":"Natsume, R., Saito, S., Huang, Z., Chen, W., Ma, C., Li, H., & Morishima, S. (2019). Siclope: Silhouette-based clothed people. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00461"},{"key":"1525_CR52","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., & Deng, J. (2016). Stacked hourglass networks for human pose estimation. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"1525_CR53","doi-asserted-by":"crossref","unstructured":"Nibali, A., He, Z., Morgan, S., & Prendergast, L. (2019). 3d human pose estimation with 2d marginal heatmaps. In IEEE winter conference on applications of computer vision (WACV).","DOI":"10.1109\/WACV.2019.00162"},{"key":"1525_CR54","doi-asserted-by":"crossref","unstructured":"Omran, M., Lassner, C., Pons-Moll, G., Gehler, P., & Schiele, B. (2018). Neural body fitting: Unifying deep learning and model based human pose and shape estimation. In International conference on 3D vision (3DV).","DOI":"10.1109\/3DV.2018.00062"},{"key":"1525_CR55","doi-asserted-by":"crossref","unstructured":"Park, S., Hwang, J., & Kwak, N. (2016). 3d human pose estimation using convolutional neural networks with 2d pose information. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-49409-8_15"},{"key":"1525_CR56","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., & Daniilidis, K. (2018). Ordinal depth supervision for 3d human pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00763"},{"key":"1525_CR57","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Derpanis, K. G., & Daniilidis, K. (2017a). Coarse-to-fine volumetric prediction for single-image 3d human pose. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.139"},{"key":"1525_CR58","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., Zhou, X., Derpanis, K. G., & Daniilidis, K. (2017b). Harvesting multiple views for marker-less 3d human pose annotations. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.138"},{"key":"1525_CR59","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., et al. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research (JMLR) 12, 2825\u20132830."},{"key":"1525_CR60","unstructured":"Peng, J., Sun, M., Zhang, Z., Tan, T., & Yan, J. (2019). Efficient neural architecture transformation search in channel-level for object detection. In Conference on neural information processing systems (NeurIPS)."},{"key":"1525_CR61","doi-asserted-by":"crossref","unstructured":"Qiu, H., Wang, C., Wang, J., Wang, N., & Zeng, W. (2019). Cross view fusion for 3d human pose estimation. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00444"},{"key":"1525_CR62","doi-asserted-by":"crossref","unstructured":"Rhodin, H., Sp\u00f6rri, J., Katircioglu, I., Constantin, V., Meyer, F., M\u00fcller, E., Salzmann, M., & Fua, P. (2018). Learning monocular 3d human pose estimation from multi-view images. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00880"},{"key":"1525_CR63","unstructured":"Rogez, G., & Schmid, C. (2016). Mocap-guided data augmentation for 3d pose estimation in the wild. In Conference on neural information processing systems (NeurIPS)."},{"key":"1525_CR64","doi-asserted-by":"crossref","unstructured":"Rogez, G., Weinzaepfel, P., & Schmid, C. (2017). Lcr-net: Localization-classification-regression for human pose. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.134"},{"key":"1525_CR65","doi-asserted-by":"crossref","unstructured":"Romero, J., Tzionas, D., & Black, M. J. (2017). Embodied hands: Modeling and capturing hands and bodies together. In ACM transactions on graphics (TOG).","DOI":"10.1145\/3130800.3130883"},{"key":"1525_CR66","unstructured":"S\u00e1r\u00e1ndi, I., Linder, T., Arras, K. O., & Leibe, B. (2018). Synthetic occlusion augmentation with volumetric heatmaps for the 2018 eccv posetrack challenge on 3d human pose estimation. In Workshop at european conference on computer vision (ECCVW)."},{"key":"1525_CR67","doi-asserted-by":"crossref","unstructured":"S\u00e1r\u00e1ndi, I., Linder, T., Arras, K., & Leibe, B. (2020). Metric-scale truncation-robust heatmaps for 3d human pose estimation. In IEEE international conference on automatic face and gesture recognition (FG).","DOI":"10.1109\/FG47880.2020.00108"},{"key":"1525_CR68","doi-asserted-by":"crossref","unstructured":"Shotton, J., Fitzgibbon, A., Cook, M., Sharp, T., Finocchio, M., Moore, R., Kipman, A., & Blake, A. (2011). Real-time human pose recognition in parts from single depth images. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2011.5995316"},{"key":"1525_CR69","doi-asserted-by":"crossref","unstructured":"Sun, X., Shang, J., Liang, S., & Wei, Y. (2017). Compositional human pose regression. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.284"},{"key":"1525_CR70","doi-asserted-by":"crossref","unstructured":"Sun, K., Xiao, B., Liu, D., & Wang, J. (2019). Deep high-resolution representation learning for human pose estimation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00584"},{"key":"1525_CR71","doi-asserted-by":"crossref","unstructured":"Sun, X., Xiao, B., Wei, F., Liang, S., & Wei, Y. (2018). Integral human pose regression. In European conference on computer vision (ECCV).","DOI":"10.1109\/ICCV.2017.284"},{"key":"1525_CR72","doi-asserted-by":"crossref","unstructured":"Tan, M., Chen, B., Pang, R., Vasudevan, V., Sandler, M., Howard, A., & Le, Q. V. (2019). Mnasnet: Platform-aware neural architecture search for mobile. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00293"},{"key":"1525_CR73","doi-asserted-by":"crossref","unstructured":"Tang, W., & Wu, Y. (2019). Does learning specific features for related parts help human pose estimation? In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00120"},{"key":"1525_CR74","doi-asserted-by":"crossref","unstructured":"Tang, W., Yu, P., & Wu, Y. (2018). Deeply learned compositional models for human pose estimation. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01219-9_12"},{"key":"1525_CR75","doi-asserted-by":"crossref","unstructured":"Taylor, C. J. (2000). Reconstruction of articulated objects from point correspondences in a single uncalibrated image. In Computer vision and image understanding (CVIU).","DOI":"10.1006\/cviu.2000.0878"},{"key":"1525_CR76","doi-asserted-by":"crossref","unstructured":"Tome, D., Russell, C., & Agapito, L. (2017). Lifting from the deep: Convolutional 3d pose estimation from a single image. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.603"},{"key":"1525_CR77","doi-asserted-by":"crossref","unstructured":"Tu, H., Wang, C., & Zeng, W. (2020). Voxelpose: Towards multi-camera 3d human pose estimation in wild environment. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-58452-8_12"},{"key":"1525_CR78","doi-asserted-by":"crossref","unstructured":"Varol, G., Ceylan, D., Russell, B., Yang, J., Yumer, E., Laptev, I., & Schmid, C. (2018). Bodynet: Volumetric inference of 3d human body shapes. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01234-2_2"},{"key":"1525_CR79","doi-asserted-by":"crossref","unstructured":"Varol, G., Romero, J., Martin, X., Mahmood, N., Black, M. J., Laptev, I., & Schmid, C. (2017). Learning from synthetic humans. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.492"},{"key":"1525_CR80","doi-asserted-by":"crossref","unstructured":"Wang, J., Huang, S., Wang, X., & Tao, D. (2019). Not all parts are created equal: 3d pose estimation by modeling bi-directional dependencies of body parts. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00786"},{"key":"1525_CR81","doi-asserted-by":"crossref","unstructured":"Wei, S. E., Ramakrishna, V., Kanade, T., & Sheikh, Y. (2016). Convolutional pose machines. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.511"},{"key":"1525_CR82","doi-asserted-by":"crossref","unstructured":"Wu, X., Finnegan, D., O\u2019Neill, E., & Yang. Y. L. (2018). Handmap: Robust hand pose estimation via intermediate dense guidance map supervision. In European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01270-0_15"},{"key":"1525_CR83","doi-asserted-by":"crossref","unstructured":"Xiong, F., Zhang, B., Xiao, Y., Cao, Z., Yu, T., Zhou, J. T., & Yuan, J. (2019). A2j: Anchor-to-joint regression network for 3d articulated pose estimation from a single depth image. In IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00088"},{"key":"1525_CR84","unstructured":"Xu, Y., Xie, L., Zhang, X., Chen, X., Qi, G. J., Tian, Q., & Xiong, H. (2020). Pc-darts: Partial channel connections for memory-efficient architecture search. In International Conference on Learning Representations (ICLR)."},{"key":"1525_CR85","doi-asserted-by":"crossref","unstructured":"Yang, W., Ouyang, W., Wang, X., Ren, J., Li, H., & Wang, X. (2018). 3d human pose estimation in the wild by adversarial learning. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00551"},{"key":"1525_CR86","doi-asserted-by":"crossref","unstructured":"Yasin, H., Iqbal, U., Kruger, B., Weber, A., & Gall, J. (2016). A dual-source approach for 3d pose estimation from a single image. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.535"},{"key":"1525_CR87","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Qiu, Z., Liu, J., Yao, T., Liu, D., & Mei, T. (2019). Customizable architecture search for semantic segmentation. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.01191"},{"key":"1525_CR88","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Yu, T., Wei, Y., Dai, Q., & Liu, Y. (2019). Deephuman: 3d human reconstruction from a single image. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00783"},{"key":"1525_CR89","doi-asserted-by":"crossref","unstructured":"Zhou, K., Han, X., Jiang, N., Jia, K., & Lu, J. (2019). Hemlets pose: Learning part-centric heatmap triplets for accurate 3d human pose estimation. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00243"},{"key":"1525_CR90","doi-asserted-by":"crossref","unstructured":"Zhou, X., Huang, Q., Sun, X., Xue, X., & Wei, Y. (2017). Towards 3d human pose estimation in the wild: a weakly-supervised approach. In IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.51"},{"key":"1525_CR91","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhu, M., Leonardos, S., Derpanis, K. G., & Daniilidis, K. (2016). Sparseness meets deepness: 3d human pose estimation from monocular video. In IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.537"},{"key":"1525_CR92","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhu, M., Pavlakos, G., Leonardos, S., Derpanis, K. G., & Daniilidis, K. (2018). Monocap: Monocular human motion capture using a cnn coupled with a geometric prior. In IEEE transactions on pattern analysis and machine intelligence (TPAMI).","DOI":"10.1109\/TPAMI.2018.2816031"},{"key":"1525_CR93","unstructured":"Zoph, B., & Le, Q. V. (2017). Neural architecture search with reinforcement learning. In International Conference on Learning Representations (ICLR)."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01525-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-021-01525-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01525-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,13]],"date-time":"2023-01-13T21:31:18Z","timestamp":1673645478000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-021-01525-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,26]]},"references-count":93,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,1]]}},"alternative-id":["1525"],"URL":"https:\/\/doi.org\/10.1007\/s11263-021-01525-0","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,10,26]]},"assertion":[{"value":"23 October 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 September 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 October 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}