{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,18]],"date-time":"2026-02-18T23:46:09Z","timestamp":1771458369554,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["2022YFF0902202"],"award-info":[{"award-number":["2022YFF0902202"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3696409.3700261","type":"proceedings-article","created":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T09:55:23Z","timestamp":1735379723000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Unified Editing Method for Co-Speech Gesture Generation via Diffusion Inversion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6612-9731","authenticated-orcid":false,"given":"Zeyu","family":"Zhao","sequence":"first","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8277-4100","authenticated-orcid":false,"given":"Nan","family":"Gao","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2150-2088","authenticated-orcid":false,"given":"Zhi","family":"Zeng","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1072-8279","authenticated-orcid":false,"given":"Guixuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China and Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8204-424X","authenticated-orcid":false,"given":"Jie","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Automation, Chinese Academy of Sciences, Beijing, China and Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6013-6351","authenticated-orcid":false,"given":"Shuwu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"crossref","unstructured":"Simon Alexanderson Rajmund Nagy Jonas Beskow and Gustav\u00a0Eje Henter. 2023. Listen denoise action! audio-driven motion synthesis with diffusion models. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201320.","DOI":"10.1145\/3592458"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Tenglong Ao Qingzhe Gao Yuke Lou Baoquan Chen and Libin Liu. 2022. Rhythmic gesticulator: Rhythm-aware co-speech gesture synthesis with hierarchical neural embeddings. ACM Transactions on Graphics (TOG) 41 6 (2022) 1\u201319.","DOI":"10.1145\/3550454.3555435"},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"crossref","unstructured":"Tenglong Ao Zeyi Zhang and Libin Liu. 2023. Gesturediffuclip: Gesture diffusion model with clip latents. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201318.","DOI":"10.1145\/3592097"},{"key":"e_1_3_3_2_5_2","unstructured":"Laurent Dinh Jascha Sohl-Dickstein and Samy Bengio. 2016. Density estimation using Real NVP. CoRR abs\/1605.08803 (2016). arXiv:https:\/\/arXiv.org\/abs\/1605.08803http:\/\/arxiv.org\/abs\/1605.08803"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00361"},{"key":"e_1_3_3_2_7_2","unstructured":"Amir Hertz Ron Mokady Jay Tenenbaum Kfir Aberman Yael Pritch and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.01626 (2022)."},{"key":"e_1_3_3_2_8_2","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems 33 (2020) 6840\u20136851."},{"key":"e_1_3_3_2_9_2","unstructured":"Edward\u00a0J Hu Yelong Shen Phillip Wallis Zeyuan Allen-Zhu Yuanzhi Li Shean Wang Lu Wang and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2106.09685 (2021)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Korrawe Karunratanakul Konpat Preechakul Emre Aksan Thabo Beeler Supasorn Suwajanakorn and Siyu Tang. 2023. Optimizing Diffusion Noise Can Serve As Universal Motion Priors. arxiv:https:\/\/arXiv.org\/abs\/2312.11994\u00a0[cs.CV]","DOI":"10.1109\/CVPR52733.2024.00133"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616120"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","unstructured":"Sergey Levine Philipp Kr\u00e4henb\u00fchl Sebastian Thrun and Vladlen Koltun. 2010. Gesture Controllers. ACM Trans. Graph. 29 4 Article 124 (jul 2010) 11\u00a0pages. 10.1145\/1778765.1778861https:\/\/dl.acm.org\/doi\/10.1145\/1778765.1778861","DOI":"10.1145\/1778765.1778861"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01110"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_36"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01021"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.1145\/2485895.2485900"},{"key":"e_1_3_3_2_17_2","unstructured":"Mehdi Mirza and Simon Osindero. 2014. Conditional Generative Adversarial Nets. arxiv:https:\/\/arXiv.org\/abs\/1411.1784\u00a0[cs.LG]"},{"key":"e_1_3_3_2_18_2","unstructured":"Evonne Ng Javier Romero Timur Bagautdinov Shaojie Bai Trevor Darrell Angjoo Kanazawa and Alexander Richard. 2024. From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.01885 (2024)."},{"key":"e_1_3_3_2_19_2","unstructured":"Simbarashe Nyatsanga Taras Kucherenko Chaitanya Ahuja Gustav\u00a0Eje Henter and Michael Neff. 2023. A Comprehensive Review of Data-Driven Co-Speech Gesture Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.05339 (2023)."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591500"},{"key":"e_1_3_3_2_21_2","unstructured":"George Papamakarios Eric Nalisnick Danilo\u00a0Jimenez Rezende Shakir Mohamed and Balaji Lakshminarayanan. 2021. Normalizing flows for probabilistic modeling and inference. Journal of Machine Learning Research 22 57 (2021) 1\u201364."},{"key":"e_1_3_3_2_22_2","first-page":"2256","volume-title":"International conference on machine learning","author":"Sohl-Dickstein Jascha","year":"2015","unstructured":"Jascha Sohl-Dickstein, Eric Weiss, Niru Maheswaranathan, and Surya Ganguli. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In International conference on machine learning. PMLR, 2256\u20132265."},{"key":"e_1_3_3_2_23_2","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.02502 (2020)."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00669"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02158"},{"key":"e_1_3_3_2_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00230"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616114"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20065-6_41"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","unstructured":"Youngwoo Yoon Bok Cha Joo-Haeng Lee Minsu Jang Jaeyeon Lee Jaehong Kim and Geehyuk Lee. 2020. Speech Gesture Generation from the Trimodal Context of Text Audio and Speaker Identity. ACM Trans. Graph. 39 6 Article 222 (nov 2020) 16\u00a0pages. 10.1145\/3414685.3417838https:\/\/dl.acm.org\/doi\/10.1145\/3414685.3417838","DOI":"10.1145\/3414685.3417838"},{"key":"e_1_3_3_2_30_2","doi-asserted-by":"crossref","unstructured":"Aram You Jin\u00a0Kuk Kim Ik\u00a0Hee Ryu and Tae\u00a0Keun Yoo. 2022. Application of generative adversarial networks (GAN) for ophthalmology image domains: a survey. Eye and Vision 9 1 (2022) 6.","DOI":"10.1186\/s40662-022-00277-3"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577190.3616118"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CoST57098.2022.00042"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1145\/3536221.3558063"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00341"},{"key":"e_1_3_3_2_36_2","unstructured":"Lingting Zhu Xian Liu Xuanyu Liu Rui Qian Ziwei Liu and Lequan Yu. 2023. Taming Diffusion Models for Audio-Driven Co-Speech Gesture Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.09119 (2023)."}],"event":{"name":"MMAsia '24: ACM Multimedia Asia","location":"Auckland New Zealand","acronym":"MMAsia '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 6th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700261","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696409.3700261","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:16Z","timestamp":1750295416000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700261"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":35,"alternative-id":["10.1145\/3696409.3700261","10.1145\/3696409"],"URL":"https:\/\/doi.org\/10.1145\/3696409.3700261","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}