{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:22Z","timestamp":1750309522963,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2682022KJ044, 2682023ZTPY004"],"award-info":[{"award-number":["2682022KJ044, 2682023ZTPY004"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372387, 61802053"],"award-info":[{"award-number":["62372387, 61802053"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key R&D Program of Guangxi Zhuang Autonomous Region, China","award":["AB22080038, AB22080039"],"award-info":[{"award-number":["AB22080038, AB22080039"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018542","name":"Natural Science Foundation of Sichuan Province","doi-asserted-by":"publisher","award":["2024NSFSC0508"],"award-info":[{"award-number":["2024NSFSC0508"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018542","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680844","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"8219-8227","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MagicCartoon: 3D Pose and Shape Estimation for Bipedal Cartoon Characters"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8847-6649","authenticated-orcid":false,"given":"Yu-Pei","family":"Song","sequence":"first","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7155-5199","authenticated-orcid":false,"given":"Yuan-Tong","family":"Liu","sequence":"additional","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8322-8558","authenticated-orcid":false,"given":"Xiao","family":"Wu","sequence":"additional","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6109-9417","authenticated-orcid":false,"given":"Qi","family":"He","sequence":"additional","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4083-5155","authenticated-orcid":false,"given":"Zhaoquan","family":"Yuan","sequence":"additional","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3494-8062","authenticated-orcid":false,"given":"Ao","family":"Luo","sequence":"additional","affiliation":[{"name":"Southwest Jiaotong University, Chengdu, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1186822.1073207"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Federica Bogo Angjoo Kanazawa Christoph Lassner Peter Gehler Javier Romero et al. 2016. Keep It SMPL: Automatic Estimation of 3D Human Pose and Shape from a Single Image. In ECCV. 561--578.","DOI":"10.1007\/978-3-319-46454-1_34"},{"key":"e_1_3_2_1_3_1","volume-title":"Dreamavatar: Text-and-shape guided 3d human avatar generation via diffusion models. arXiv preprint arXiv:2304.00916","author":"Cao Yukang","year":"2023","unstructured":"Yukang Cao, Yan-Pei Cao, Kai Han, Ying Shan, and Kwan-Yee K Wong. 2023. Dreamavatar: Text-and-shape guided 3d human avatar generation via diffusion models. arXiv preprint arXiv:2304.00916 (2023)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Shuhong Chen Kevin Zhang Yichun Shi Heng Wang Yiheng Zhu et al. 2023. PAniC-3D: Stylized Single-view 3D Reconstruction from Portraits of Anime Characters. In CVPR. 21068--21077.","DOI":"10.1109\/CVPR52729.2023.02018"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Junhyeong Cho Kim Youwang and Tae-Hyun Oh. 2022. Cross-Attention of Disentangled Modalities for 3D Human Mesh Recovery with Transformers. In ECCV. 342--359.","DOI":"10.1007\/978-3-031-19769-7_20"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Enric Corona Gerard Pons-Moll Guillem Aleny\u00e0 and Francesc Moreno-Noguer. 2022. Learned Vertex Descent: A New Direction for 3D Human Model Fitting. In ECCV. 146--195.","DOI":"10.1007\/978-3-031-20086-1_9"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Jia Deng Wei Dong Richard Socher Li-Jia Li Kai Li and Li Fei-Fei. 2009. ImageNet: A large-scale hierarchical image database. In CVPR. 248--255.","DOI":"10.1109\/CVPRW.2009.5206848"},{"key":"e_1_3_2_1_8_1","volume-title":"Words: Transformers for Image Recognition at Scale. In ICLR.","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, et al. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In ICLR."},{"key":"e_1_3_2_1_9_1","volume-title":"Jiyang Xie, Zhanyu Ma, et al.","author":"Du Ruoyi","year":"2020","unstructured":"Ruoyi Du, Dongliang Chang, Ayan Kumar Bhunia, Jiyang Xie, Zhanyu Ma, et al. 2020. Fine-Grained Visual Classification via Progressive Multi-granularity Training of Jigsaw Patches. In ECCV. 153--168."},{"key":"e_1_3_2_1_10_1","article-title":"Monster mash: a single-view approach to casual 3D modeling and animation","volume":"39","author":"Dvorovzvn\u00e1k Marek","year":"2020","unstructured":"Marek Dvorovzvn\u00e1k, Daniel S\u00fdkora, Cassidy Curtis, Brian Curless, Olga Sorkine-Hornung, et al. 2020. Monster mash: a single-view approach to casual 3D modeling and animation. ACM Trans. Graph., Vol. 39, 6 (2020).","journal-title":"ACM Trans. Graph."},{"volume-title":"MagicToon: A 2D-to-3D creative cartoon modeling system with mobile AR","author":"Feng Lele","key":"e_1_3_2_1_11_1","unstructured":"Lele Feng, Xubo Yang, and Shuangjiu Xiao. 2017. MagicToon: A 2D-to-3D creative cartoon modeling system with mobile AR. In IEEE VR. 195--204."},{"key":"e_1_3_2_1_12_1","unstructured":"Chris Fifty Ehsan Amid Zhe Zhao Tianhe Yu Rohan Anil et al. 2021. Efficiently Identifying Task Groupings for Multi-Task Learning. In NeurIPS. 27503--27516."},{"key":"e_1_3_2_1_13_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770--778."},{"key":"e_1_3_2_1_14_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In NeurIPS. 6840--6851."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Zihao Huang Min Shi Chengxin Liu Ke Xian and Zhiguo Cao. 2023. SimHMR: A Simple Query-based Framework for Parameterized Human Mesh Reconstruction. In ACM MM. 6918--6927.","DOI":"10.1145\/3581783.3611814"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.248"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Boyi Jiang Yang Hong Hujun Bao and Juyong Zhang. 2022. SelfRecon: Self Reconstruction Your Digital Avatar From Monocular Video. In CVPR. 5605--5615.","DOI":"10.1109\/CVPR52688.2022.00552"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Angjoo Kanazawa Michael J. Black David W. Jacobs and Jitendra Malik. 2018. End-to-End Recovery of Human Shape and Pose. In CVPR. 7122--7131.","DOI":"10.1109\/CVPR.2018.00744"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Rawal Khirodkar Shashank Tripathi and Kris Kitani. 2022. Occluded Human Mesh Recovery. In CVPR. 1715--1725.","DOI":"10.1109\/CVPR52688.2022.00176"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Jeonghwan Kim Mi-Gyeong Gwon Hyunwoo Park Hyukmin Kwon Gi-Mun Um et al. 2023. Sampling Is Matter: Point-Guided 3D Human Mesh Reconstruction. In CVPR. 12880--12889.","DOI":"10.1109\/CVPR52729.2023.01238"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Nikos Kolotouros Georgios Pavlakos Michael J. Black and Kostas Daniilidis. 2019. Learning to Reconstruct 3D Human Pose and Shape via Model-Fitting in the Loop. In ICCV. 2252--2261.","DOI":"10.1109\/ICCV.2019.00234"},{"key":"e_1_3_2_1_22_1","unstructured":"Jiefeng Li Chao Xu Zhicun Chen Siyuan Bian Lixin Yang et al. 2021. HybrIK: A Hybrid Analytical-Neural Inverse Kinematics Solution for 3D Human Pose and Shape Estimation. In CVPR. 3383--3393."},{"key":"e_1_3_2_1_23_1","volume-title":"CLIFF: Carrying Location Information in Full Frames into Human Pose and Shape Estimation. In ECCV. 590--606.","author":"Li Zhihao","year":"2022","unstructured":"Zhihao Li, Jianzhuang Liu, Zhensong Zhang, Songcen Xu, and Youliang Yan. 2022. CLIFF: Carrying Location Information in Full Frames into Human Pose and Shape Estimation. In ECCV. 590--606."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Kevin Lin Lijuan Wang and Zicheng Liu. 2021. End-to-End Human Pose and Mesh Reconstruction with Transformers. In CVPR. 1954--1963.","DOI":"10.1109\/CVPR46437.2021.00199"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Kevin Lin Lijuan Wang and Zicheng Liu. 2021. Mesh Graphormer. In ICCV. 12939--12948.","DOI":"10.1109\/ICCV48922.2021.01270"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona et al. 2014. Microsoft COCO: Common Objects in Context. In ECCV. 740--755.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_27_1","article-title":"SMPL","volume":"34","author":"Loper Matthew","year":"2015","unstructured":"Matthew Loper, Naureen Mahmood, Javier Romero, Gerard Pons-Moll, and Michael J. Black. 2015. SMPL: A Skinned Multi-Person Linear Model. ACM Trans. Graph., Vol. 34, 6 (2015).","journal-title":"A Skinned Multi-Person Linear Model. ACM Trans. Graph."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Zhongjin Luo Shengcai Cai Jinguo Dong Ruibo Ming Liangdong Qiu et al. 2023. RaBit: Parametric Modeling of 3D Biped Cartoon Characters with a Topological-consistent Dataset. In CVPR. 12825--12835.","DOI":"10.1109\/CVPR52729.2023.01233"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Zhongjin Luo Jie Zhou Heming Zhu Dong Du Xiaoguang Han and Hongbo Fu. 2021. SimpModeling: Sketching Implicit Field to Guide Mesh Modeling for 3D Animalmorphic Head Design. In UIST. 854--863.","DOI":"10.1145\/3472749.3474791"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Xiaoxuan Ma Jiajun Su Chunyu Wang Wentao Zhu and Yizhou Wang. 2023. 3D Human Mesh Estimation From Virtual Markers. In CVPR. 534--543.","DOI":"10.1109\/CVPR52729.2023.00059"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul P. Srinivasan Matthew Tancik Jonathan T. Barron Ravi Ramamoorthi and Ren Ng. 2020. NeRF: Representing Scenes as Neural Radiance Fields for View Synthesis. In ECCV. 99--106.","DOI":"10.1145\/3503250"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Gyeongsik Moon Hyeongjin Nam Takaaki Shiratori and Kyoung Mu Lee. 2022. 3D Clothed Human Reconstruction in the Wild. In ECCV. 184--200.","DOI":"10.1007\/978-3-031-20086-1_11"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Jeong Joon Park Peter Florence Julian Straub Richard Newcombe and Steven Lovegrove. 2019. DeepSDF: Learning Continuous Signed Distance Functions for Shape Representation. In CVPR. 165--174.","DOI":"10.1109\/CVPR.2019.00025"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Georgios Pavlakos Vasileios Choutas Nima Ghorbani Timo Bolkart Ahmed A. A. Osman et al. 2019. Expressive Body Capture: 3D Hands Face and Body From a Single Image. In CVPR. 10975--10985.","DOI":"10.1109\/CVPR.2019.01123"},{"key":"e_1_3_2_1_35_1","volume-title":"Guibas","author":"Qi Charles R.","year":"2017","unstructured":"Charles R. Qi, Li Yi, Hao Su, and Leonidas J. Guibas. 2017. PointNet: deep hierarchical feature learning on point sets in a metric space. In NeurIPS. 5105--5114."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Yuda Qiu Xiaojie Xu Lingteng Qiu Yan Pan Yushuang Wu et al. 2021. 3DCaricShop: A Dataset and A Baseline Method for Single-view 3D Caricature Face Reconstruction. In CVPR. 10236--10245.","DOI":"10.1109\/CVPR46437.2021.01010"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1778765.1778796"},{"key":"e_1_3_2_1_38_1","volume-title":"Black","author":"Saito Shunsuke","year":"2021","unstructured":"Shunsuke Saito, Jinlong Yang, Qianli Ma, and Michael J. Black. 2021. SCANimate: Weakly Supervised Learning of Skinned Clothed Avatar Networks. In CVPR. 2886--2897."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2023.3298639","article-title":"LGSleepNet: An Automatic Sleep Staging Model Based on Local and Global Representation Learning","volume":"72","author":"Shen Qi","year":"2023","unstructured":"Qi Shen, Junchang Xin, Xinyao Liu, Zhongyang Wang, Chuangang Li, et al. 2023. LGSleepNet: An Automatic Sleep Staging Model Based on Local and Global Representation Learning. IEEE Trans. Instrum. Meas., Vol. 72 (2023), 1--14.","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Yu Sun Qian Bao Wu Liu Yili Fu Black Michael J. et al. 2021. Monocular One-stage Regression of Multiple 3D People. In ICCV. 11179--11188.","DOI":"10.1109\/ICCV48922.2021.01099"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3298850"},{"key":"e_1_3_2_1_42_1","first-page":"2579","article-title":"Visualizing Data using t-SNE","volume":"9","author":"van der Maaten Laurens","year":"2008","unstructured":"Laurens van der Maaten and Geoffrey Hinton. 2008. Visualizing Data using t-SNE. J. Mach. Learn. Res., Vol. 9, 86 (2008), 2579--2605.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2983686"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Chung-Yi Weng Brian Curless and Ira Kemelmacher-Shlizerman. 2019. Photo Wake-Up: 3D Character Animation From a Single Photo. In CVPR. 5901--5910.","DOI":"10.1109\/CVPR.2019.00606"},{"key":"e_1_3_2_1_45_1","volume-title":"Black","author":"Xiu Yuliang","year":"2023","unstructured":"Yuliang Xiu, Jinlong Yang, Xu Cao, Dimitrios Tzionas, and Michael J. Black. 2023. ECON: Explicit Clothed Humans Optimized via Normal Integration. In CVPR. 512--523."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Youze Xue Jiansheng Chen Yudong Zhang Cheng Yu Huimin Ma et al. 2022. 3D Human Mesh Reconstruction by Learning to Sample Joint Adaptive Tokens for Transformers. In ACM MM. 6765--6773.","DOI":"10.1145\/3503161.3548133"},{"key":"e_1_3_2_1_47_1","unstructured":"Sen Yang Wen Heng Gang Liu Guozhong Luo Wankou Yang et al. 2023. Capturing the Motion of Every Joint: 3D Human Pose and Shape Estimation with Independent Tokens. In ICLR."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2014.2360406"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Yusuke Yoshiyasu. 2023. Deformable Mesh Transformer for 3D Human Mesh Recovery. In CVPR. 17006--17015.","DOI":"10.1109\/CVPR52729.2023.01631"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Andrei Zanfir Elisabeta Marinoiu and Cristian Sminchisescu. 2018. Monocular 3D Pose and Shape Estimation of Multiple People in Natural Scenes - The Importance of Multiple Scene Constraints. In CVPR. 2148--2157.","DOI":"10.1109\/CVPR.2018.00229"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Wang Zeng Wanli Ouyang Ping Luo Wentao Liu and Xiaogang Wang. 2020. 3D Human Mesh Regression With Dense Correspondence. In CVPR. 7054--7063.","DOI":"10.1109\/CVPR42600.2020.00708"},{"key":"e_1_3_2_1_52_1","volume-title":"Avatarverse: High-quality & stable 3d avatar creation from text and pose. arXiv preprint arXiv:2308.03610","author":"Zhang Huichao","year":"2023","unstructured":"Huichao Zhang, Bowen Chen, Hao Yang, Liao Qu, Xu Wang, et al. 2023. Avatarverse: High-quality & stable 3d avatar creation from text and pose. arXiv preprint arXiv:2308.03610 (2023)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Hongwen Zhang Yating Tian Xinchi Zhou Wanli Ouyang Yebin Liu et al. 2021. PyMAF: 3D Human Pose and Shape Regression With Pyramidal Mesh Alignment Feedback Loop. In ICCV. 11446--11456.","DOI":"10.1109\/ICCV48922.2021.01125"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680844","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680844","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:08Z","timestamp":1750295888000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680844"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":53,"alternative-id":["10.1145\/3664647.3680844","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680844","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}