{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:50:04Z","timestamp":1755802204133,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["IIS-2309073,ECCS-212352101"],"award-info":[{"award-number":["IIS-2309073,ECCS-212352101"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3658092","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"423-432","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Monocular Expressive 3D Human Reconstruction of Multiple People"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-1934-4661","authenticated-orcid":false,"given":"Zhenghao","family":"Zhao","sequence":"first","affiliation":[{"name":"Illinois Institute of Technology, Chicago, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2077-1246","authenticated-orcid":false,"given":"Hao","family":"Tang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4370-2192","authenticated-orcid":false,"given":"Joy","family":"Wan","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana-Champaign, Urbana-Champaign, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7618-119X","authenticated-orcid":false,"given":"Yan","family":"Yan","sequence":"additional","affiliation":[{"name":"Illinois Institute of Technology, Chicago, IL, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.471"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186822.1073207"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/311535.311556"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_34"},{"volume-title":"Virtual reality technology","author":"Burdea Grigore C","key":"e_1_3_2_1_5_1","unstructured":"Grigore C Burdea and Philippe Coiffet. 2003. Virtual reality technology. John Wiley & Sons."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_7_1","volume-title":"Augmented reality technologies, systems and applications. Multimedia tools and applications","author":"Carmigniani Julie","year":"2011","unstructured":"Julie Carmigniani, Borko Furht, Marco Anisetti, Paolo Ceravolo, Ernesto Damiani, and Misa Ivkovic. 2011. Augmented reality technologies, systems and applications. Multimedia tools and applications, Vol. 51, 1 (2011), 341--377."},{"key":"e_1_3_2_1_8_1","volume-title":"Single-image depth perception in the wild. Advances in neural information processing systems","author":"Chen Weifeng","year":"2016","unstructured":"Weifeng Chen, Zhao Fu, Dawei Yang, and Jia Deng. 2016. Single-image depth perception in the wild. Advances in neural information processing systems , Vol. 29 (2016)."},{"key":"e_1_3_2_1_9_1","volume-title":"3DCrowdNet: 2D Human Pose-Guided3D Crowd Human Pose and Shape Estimation in the Wild. arXiv preprint arXiv:2104.07300","author":"Choi Hongsuk","year":"2021","unstructured":"Hongsuk Choi, Gyeongsik Moon, JoonKyu Park, and Kyoung Mu Lee. 2021. 3DCrowdNet: 2D Human Pose-Guided3D Crowd Human Pose and Shape Estimation in the Wild. arXiv preprint arXiv:2104.07300 (2021)."},{"volume-title":"European Conference on Computer Vision (ECCV). 20--40","author":"Choutas Vasileios","key":"e_1_3_2_1_10_1","unstructured":"Vasileios Choutas, Georgios Pavlakos, Timo Bolkart, Dimitrios Tzionas, and Michael J. Black. 2020. Monocular Expressive Body Regression through Body-Driven Attention. In European Conference on Computer Vision (ECCV). 20--40. https:\/\/expose.is.tue.mpg.de"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00038"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3395208"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00088"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00088"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3450626.3459936"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00326"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_19_1","volume-title":"6m: Large scale datasets and predictive methods for 3d human sensing in natural environments","author":"Ionescu Catalin","year":"2013","unstructured":"Catalin Ionescu, Dragos Papava, Vlad Olaru, and Cristian Sminchisescu. 2013. Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE transactions on pattern analysis and machine intelligence, Vol. 36, 7 (2013), 1325--1339."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00562"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00015"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00744"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00176"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_15"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00234"},{"key":"e_1_3_2_1_26_1","volume-title":"Michael Bronstein, and Stefanos Zafeiriou.","author":"Kulon Dominik","year":"2019","unstructured":"Dominik Kulon, Haoyang Wang, Riza Alp G\u00fcler, Michael Bronstein, and Stefanos Zafeiriou. 2019. Single image 3d hand reconstruction with mesh convolutions. arXiv preprint arXiv:1905.01326 (2019)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196596"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00339"},{"key":"e_1_3_2_1_29_1","volume-title":"Interacting Attention Graph for Single Image Two-Hand Reconstruction. arXiv preprint arXiv:2203.09364","author":"Li Mengcheng","year":"2022","unstructured":"Mengcheng Li, Liang An, Hongwen Zhang, Lianpeng Wu, Feng Chen, Tao Yu, and Yebin Liu. 2022. Interacting Attention Graph for Single Image Two-Hand Reconstruction. arXiv preprint arXiv:2203.09364 (2022)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00257"},{"key":"e_1_3_2_1_35_1","volume-title":"Pose2pose: 3d positional pose-guided 3d rotational pose prediction for expressive 3d human pose and mesh estimation. arXiv preprint arXiv:2011.11534","author":"Moon Gyeongsik","year":"2020","unstructured":"Gyeongsik Moon and Kyoung Mu Lee. 2020. Pose2pose: 3d positional pose-guided 3d rotational pose prediction for expressive 3d human pose and mesh estimation. arXiv preprint arXiv:2011.11534 (2020)."},{"volume-title":"PyTorch: An Imperative Style","author":"Paszke Adam","key":"e_1_3_2_1_36_1","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas Kopf, Edward Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.). Curran Associates, Inc., 8024--8035. http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"volume-title":"Proceedings IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR).","author":"Patel Priyanka","key":"e_1_3_2_1_37_1","unstructured":"Priyanka Patel, Chun-Hao P. Huang, Joachim Tesch, David T. Hoffmann, Shashank Tripathi, and Michael J. Black. 2021. AGORA: Avatars in Geography Optimized for Regression Analysis. In Proceedings IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01123"},{"volume-title":"Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR). 10975--10985","author":"Pavlakos Georgios","key":"e_1_3_2_1_39_1","unstructured":"Georgios Pavlakos, Vasileios Choutas, Nima Ghorbani, Timo Bolkart, Ahmed A. A. Osman, Dimitrios Tzionas, and Michael J. Black. 2019b. Expressive Body Capture: 3D Hands, Face, and Body from a Single Image. In Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR). 10975--10985."},{"key":"e_1_3_2_1_40_1","volume-title":"European Conference on Computer Vision. Springer, 572--578","author":"Pigou Lionel","year":"2014","unstructured":"Lionel Pigou, Sander Dieleman, Pieter-Jan Kindermans, and Benjamin Schrauwen. 2014. Sign language recognition using convolutional neural networks. In European Conference on Computer Vision. Springer, 572--578."},{"key":"e_1_3_2_1_41_1","volume-title":"Black","author":"Romero Javier","year":"2017","unstructured":"Javier Romero, Dimitrios Tzionas, and Michael J. Black. 2017. Embodied Hands: Modeling and Capturing Hands and Bodies Together. ACM Transactions on Graphics, (Proc. SIGGRAPH Asia), Vol. 36, 6 (Nov. 2017)."},{"key":"e_1_3_2_1_42_1","volume-title":"Embodied hands: Modeling and capturing hands and bodies together. arXiv preprint arXiv:2201.02610","author":"Romero Javier","year":"2022","unstructured":"Javier Romero, Dimitrios Tzionas, and Michael J Black. 2022. Embodied hands: Modeling and capturing hands and bodies together. arXiv preprint arXiv:2201.02610 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"Frankmocap: Fast monocular 3d hand and body motion capture by regression and integration. arXiv preprint arXiv:2008.08324","author":"Rong Yu","year":"2020","unstructured":"Yu Rong, Takaaki Shiratori, and Hanbyul Joo. 2020. Frankmocap: Fast monocular 3d hand and body motion capture by regression and integration. arXiv preprint arXiv:2008.08324 (2020)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00196"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01099"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01289"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00464"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00270"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00122"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_37"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00956"},{"key":"e_1_3_2_1_52_1","volume-title":"Parameter-free spatial attention network for person re-identification. arXiv preprint arXiv:1811.12150","author":"Wang Haoran","year":"2018","unstructured":"Haoran Wang, Yue Fan, Zexin Wang, Licheng Jiao, and Bernt Schiele. 2018. Parameter-free spatial attention network for person re-identification. arXiv preprint arXiv:1811.12150 (2018)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_23"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00569"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01125"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00061"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00618"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00478"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"Phuket Thailand","acronym":"ICMR '24"},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658092","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3658092","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:45:54Z","timestamp":1755765954000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3658092"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":59,"alternative-id":["10.1145\/3652583.3658092","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3658092","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}