{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T21:08:06Z","timestamp":1773176886978,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"JiNan AI Computing Center"},{"name":"Shenzhen College Stability Support Plan","award":["No.:GXWD20220817144428005"],"award-info":[{"award-number":["No.:GXWD20220817144428005"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.:62006142, No.:U1936203, and No.:62236003"],"award-info":[{"award-number":["No.:62006142, No.:U1936203, and No.:62236003"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Special Fund for Distinguished Professors of Shandong Jianzhu University"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612841","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:40Z","timestamp":1698391660000},"page":"9441-9445","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Realistic Conversational Head Generation: A Comprehensive Framework for Lifelike Video Synthesis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1582-5764","authenticated-orcid":false,"given":"Meng","family":"Liu","sequence":"first","affiliation":[{"name":"Shandong Jianzhu University, Jinan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7586-9232","authenticated-orcid":false,"given":"Yongqiang","family":"Li","sequence":"additional","affiliation":[{"name":"Shandong University, Tsingtao, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6990-9595","authenticated-orcid":false,"given":"Shuyan","family":"Zhai","sequence":"additional","affiliation":[{"name":"City University of Hong Kong, Hong Kong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5658-5509","authenticated-orcid":false,"given":"Weili","family":"Guan","sequence":"additional","affiliation":[{"name":"Monash Univerisity, Melbourne, VIC, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1476-0273","authenticated-orcid":false,"given":"Liqiang","family":"Nie","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen), Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Speech driven video editing via an audio-conditioned diffusion model. arXiv preprint arXiv:2301.04474","author":"Bigioi Dan","year":"2023","unstructured":"Dan Bigioi, Shubhajit Basak, Hugh Jordan, Rachel McDonnell, and Peter Corcoran. 2023. Speech driven video editing via an audio-conditioned diffusion model. arXiv preprint arXiv:2301.04474 (2023), 1--12."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00038"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2010.5700843"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00573"},{"key":"e_1_3_2_2_7_1","first-page":"2678","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","volume":"30","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in Neural Information Processing Systems, Vol. 30 (2017), 2678--2683.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-019-01150-y"},{"key":"e_1_3_2_2_9_1","volume-title":"Deep video portraits. ACM Transactions on Graphics","author":"Kim Hyeongwoo","year":"2018","unstructured":"Hyeongwoo Kim, Pablo Garrido, Ayush Tewari, Weipeng Xu, Justus Thies, Matthias Niessner, Patrick P\u00c3\u00a9rez, Christian Richardt, Michael Zollh\u00f6fer, and Christian Theobalt. 2018. Deep video portraits. ACM Transactions on Graphics (2018), 1\u00e2??14."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3551569"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2011.2131660"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00049"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01350"},{"key":"e_1_3_2_2_16_1","volume-title":"Difftalk: Crafting diffusion models for generalized talking head synthesis. arXiv preprint arXiv:2301.03786","author":"Shen Shuai","year":"2023","unstructured":"Shuai Shen, Wenliang Zhao, Zibin Meng, Wanhua Li, Zheng Zhu, Jie Zhou, and Jiwen Lu. 2023. Difftalk: Crafting diffusion models for generalized talking head synthesis. arXiv preprint arXiv:2301.03786 (2023), 1--10."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIFS.2022.3146783"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/129"},{"key":"e_1_3_2_2_19_1","volume-title":"Diffused heads: Diffusion models beat gans on talking-face generation. arXiv preprint arXiv:2301.03396","author":"Stypu\u0142kowski Micha\u0142","year":"2023","unstructured":"Micha\u0142 Stypu\u0142kowski, Konstantinos Vougioukas, Sen He, Maciej Zik\u0119ba, Stavros Petridis, and Maja Pantic. 2023. Diffused heads: Diffusion models beat gans on talking-face generation. arXiv preprint arXiv:2301.03396 (2023), 1--10."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3072959.3073640"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/152"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2020.3023573"},{"key":"e_1_3_2_2_24_1","volume-title":"Multimodal-driven talking face generation via a unified diffusion-based generator. CoRR","author":"Xu Chao","year":"2023","unstructured":"Chao Xu, Shaoting Zhu, Junwei Zhu, Tianxin Huang, Jiangning Zhang, Ying Tai, and Yong Liu. 2023. Multimodal-driven talking face generation via a unified diffusion-based generator. CoRR (2023), 1--14."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3274676"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00836"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00366"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00416"},{"key":"e_1_3_2_2_30_1","volume-title":"Interactive conversational head generation. arXiv preprint arXiv:2307.02090","author":"Zhou Mohan","year":"2023","unstructured":"Mohan Zhou, Yalong Bai, Wei Zhang, Ting Yao, and Tiejun Zhao. 2023. Interactive conversational head generation. arXiv preprint arXiv:2307.02090 (2023), 1--11."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19839-7_8"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3414685.3417774","article-title":"Makelttalk: speaker-aware talking-head animation","volume":"39","author":"Zhou Yang","year":"2020","unstructured":"Yang Zhou, Xintong Han, Eli Shechtman, Jose Echevarria, Evangelos Kalogerakis, and Dingzeyu Li. 2020. Makelttalk: speaker-aware talking-head animation. ACM Transactions on Graphics, Vol. 39, 6 (2020), 1--15.","journal-title":"ACM Transactions on Graphics"},{"key":"e_1_3_2_2_33_1","volume-title":"Proceedings of the International Conference on International Joint Conferences on Artificial Intelligence. 2362--2368","author":"Zhu Hao","year":"2021","unstructured":"Hao Zhu, Huaibo Huang, Yi Li, Aihua Zheng, and Ran He. 2021. Arbitrary talking face generation via attentional audio-visual coherence learning. In Proceedings of the International Conference on International Joint Conferences on Artificial Intelligence. 2362--2368."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612841","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612841","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:12:17Z","timestamp":1755821537000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612841"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":33,"alternative-id":["10.1145\/3581783.3612841","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612841","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}