{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T04:42:41Z","timestamp":1781584961230,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3680693","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:27Z","timestamp":1729925967000},"page":"7085-7093","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":23,"title":["HandRefiner: Refining Malformed Hands in Generated Images by Diffusion-based Conditional Inpainting"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-6503-5447","authenticated-orcid":false,"given":"Wenquan","family":"Lu","sequence":"first","affiliation":[{"name":"The University of Sydney, Sydney, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9931-5138","authenticated-orcid":false,"given":"Yufei","family":"Xu","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6595-7661","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9002-1029","authenticated-orcid":false,"given":"Chaoyue","family":"Wang","sequence":"additional","affiliation":[{"name":"The University of Sydney, Sydney, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7225-5449","authenticated-orcid":false,"given":"Dacheng","family":"Tao","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. Midjourney. https:\/\/www.midjourney.com\/. Accessed: 2024-02-13."},{"key":"e_1_3_2_1_2_1","unstructured":"Synthesis AI. 2023. Animated Gestures Dataset. data retrieved from Synthesis AI https:\/\/synthesis.ai\/animated-gestures-dataset\/."},{"key":"e_1_3_2_1_3_1","unstructured":"Synthesis AI. 2023. Static Gestures Dataset. data retrieved from Synthesis AI https:\/\/synthesis.ai\/static-gestures-dataset\/."},{"key":"e_1_3_2_1_4_1","unstructured":"Shariq Farooq Bhat Reiner Birkl DianaWofk PeterWonka and Matthias M\u00fcller. 2023. ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth. arXiv:2302.12288 [cs.CV]"},{"key":"e_1_3_2_1_5_1","volume-title":"Demystifying MMD GANs. In International Conference on Learning Representations.","author":"Bi\u0144kowski Miko\u0141aj","year":"2018","unstructured":"Miko\u0141aj Bi\u0144kowski, Danica J. Sutherland, Michael Arbel, and Arthur Gretton. 2018. Demystifying MMD GANs. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.143"},{"key":"e_1_3_2_1_7_1","volume-title":"Wortman Vaughan (Eds.)","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion Models Beat GANs on Image Synthesis. In Advances in Neural Information Processing Systems, M. Ranzato, A. Beygelzimer, Y. Dauphin, P.S. Liang, and J. Wortman Vaughan (Eds.), Vol. 34. Curran Associates, Inc., 8780--8794. https:\/\/proceedings.neurips.cc\/ paper_files\/paper\/2021\/file\/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf"},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 2003 ACM SIGGRAPH\/Eurographics Symposium on Computer Animation","author":"ElKoura George","year":"2003","unstructured":"George ElKoura and Karan Singh. 2003. Handrix: Animating the Human Hand. In Proceedings of the 2003 ACM SIGGRAPH\/Eurographics Symposium on Computer Animation (San Diego, California) (SCA '03). 110--119."},{"key":"e_1_3_2_1_9_1","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2017. GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. In Advances In Neural Information Processing Systems."},{"key":"e_1_3_2_1_10_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In Advances In Neural Information Processing Systems."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.248"},{"key":"e_1_3_2_1_12_1","unstructured":"Alexander Kapitanov Andrew Makhlyarchuk and Karina Kvanchiani. 2022. Ha-GRID -HAnd Gesture Recognition Image Dataset. arXiv:2206.08219 [cs.CV]"},{"key":"e_1_3_2_1_13_1","volume-title":"BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning.","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_14_1","volume-title":"Mesh Graphormer. In Proceedings of the IEEE International Conference on Computer Vision.","author":"Lin Kevin","year":"2021","unstructured":"Kevin Lin, Lijuan Wang, and Zicheng Liu. 2021. Mesh Graphormer. In Proceedings of the IEEE International Conference on Computer Vision."},{"key":"e_1_3_2_1_15_1","volume-title":"Decoupled Weight Decay Regularization. In International Conference on Learning Representations.","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","volume-title":"MediaPipe: A Framework for Building Perception Pipelines. In Third Workshop on Computer Vision for AR\/VR.","author":"Lugaresi Camillo","year":"2019","unstructured":"Camillo Lugaresi, Jiuqiang Tang, Hadon Nash, Chris McClanahan, Esha Uboweja, Michael Hays, Fan Zhang, Chuo-Ling Chang, Ming Guang Yong, Juhyun Lee, Wan-Teh Chang, Wei Hua, Manfred Georg, and Matthias Grundmann. 2019. MediaPipe: A Framework for Building Perception Pipelines. In Third Workshop on Computer Vision for AR\/VR."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Chong Mou Xintao Wang Liangbin Xie Yanze Wu Jian Zhang Zhongang Qi Ying Shan and Xiaohu Qie. 2023. T2I-Adapter: Learning Adapters to Dig out More Controllable Ability for Text-to-Image Diffusion Models. arXiv:2302.08453 [cs.CV]","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision Workshops.","author":"Narasimhaswamy Supreeth","year":"2023","unstructured":"Supreeth Narasimhaswamy, Uttaran Bhattacharya, Xiang Chen, Ishita Dasgupta, and Saayan Mitra. 2023. Text-to-Hand-Image Generation Using Pose- and Mesh-Guided Diffusion. In Proceedings of the IEEE International Conference on Computer Vision Workshops."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00239"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"16804","author":"Nichol Alexander Quinn","year":"2022","unstructured":"Alexander Quinn Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob Mcgrew, Ilya Sutskever, and Mark Chen. 2022. GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. In Proceedings of the 39th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 162), Kamalika Chaudhuri, Stefanie Jegelka, Le Song, Csaba Szepesvari, Gang Niu, and Sivan Sabato (Eds.). PMLR, 16784--16804. https:\/\/proceedings.mlr.press\/v162\/nichol22a.html"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_1_23_1","volume-title":"SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis.","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis."},{"key":"e_1_3_2_1_24_1","volume-title":"Advances In Neural Information Processing Systems","volume":"32","author":"Qiao Tingting","year":"2019","unstructured":"Tingting Qiao, Jing Zhang, Duanqing Xu, and Dacheng Tao. 2019. Learn, Imagine and Create: Text-to-Image Generation from Prior Knowledge. In Advances In Neural Information Processing Systems, Vol. 32."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00160"},{"key":"e_1_3_2_1_26_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arXiv:2204.06125 [cs.CV]"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3019967"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130883"},{"key":"e_1_3_2_1_30_1","volume-title":"Oh (Eds.)","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, Jonathan Ho, David J Fleet, and Mohammad Norouzi. 2022. Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 36479--36494. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ ec795aeadae0b7d230fa35cbaf04c041-Paper-Conference.pdf"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Dvir Samuel Rami Ben-Ari Simon Raviv Nir Darshan and Gal Chechik. 2023. Generating Images of Rare Concepts Using Pre-trained Diffusion Models. arXiv:2304.14530 [cs.CV]","DOI":"10.1609\/aaai.v38i5.28270"},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2265","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep Unsupervised Learning using Nonequilibrium Thermodynamics. In International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 37), Francis Bach and David Blei (Eds.). PMLR, Lille, France, 2256--2265. https:\/\/proceedings.mlr.press\/v37\/sohl-dickstein15.html"},{"key":"e_1_3_2_1_33_1","volume-title":"Denoising Diffusion Implicit Models. In International Conference on Learning Representations. https: \/\/openreview.net\/forum?id=St1giarCHLP","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising Diffusion Implicit Models. In International Conference on Learning Representations. https: \/\/openreview.net\/forum?id=St1giarCHLP"},{"key":"e_1_3_2_1_34_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=PxTIG12RRHS","author":"Song Yang","year":"2021","unstructured":"Yang Song, Jascha Sohl-Dickstein, Diederik P Kingma, Abhishek Kumar, Stefano Ermon, and Ben Poole. 2021. Score-Based Generative Modeling through Stochastic Differential Equations. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=PxTIG12RRHS"},{"key":"e_1_3_2_1_35_1","unstructured":"Zhenzhen Weng Laura Bravo-S\u00e1nchez and Serena Yeung. 2023. Diffusion-HPC: Generating Synthetic Images with Realistic Humans. arXiv:2303.09541 [cs.CV]"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00366"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02153"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00027"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops.","author":"Zhang Fan","year":"2020","unstructured":"Fan Zhang, Valentin Bazarevsky, Andrey Vakunov, Andrei Tkachenka, George Sung, Chuo-Ling Chang, and Matthias Grundmann. 2020. MediaPipe Hands: On-device Real-time Hand Tracking. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_41_1","unstructured":"Shihao Zhao Dongdong Chen Yen-Chun Chen Jianmin Bao Shaozhe Hao Lu Yuan and Kwan-Yee K Wong. 2023. Uni-ControlNet: All-in-One Control to Text-to-Image Diffusion Models. In Advances In Neural Information Processing Systems."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00090"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680693","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3680693","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:57Z","timestamp":1750295877000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3680693"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":42,"alternative-id":["10.1145\/3664647.3680693","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3680693","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}