{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:45:13Z","timestamp":1765309513457,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":7,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No. 92370119, and 62376113"],"award-info":[{"award-number":["No. 92370119, and 62376113"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754462","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:17Z","timestamp":1761375257000},"page":"13486-13488","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["KDTalker++: Controllable Talking Portrait Generation with Audio, Text, and Expression Editing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0396-6559","authenticated-orcid":false,"given":"Chaolong","family":"Yang","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-6867-6944","authenticated-orcid":false,"given":"Yinuo","family":"Guo","sequence":"additional","affiliation":[{"name":"Duke Kunshan University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4623-0365","authenticated-orcid":false,"given":"Kai","family":"Yao","sequence":"additional","affiliation":[{"name":"Ant Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2824-1431","authenticated-orcid":false,"given":"Yuyao","family":"Yan","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4654-3081","authenticated-orcid":false,"given":"Jie","family":"Sun","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong-Liverpool University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3034-9639","authenticated-orcid":false,"given":"Kaizhu","family":"Huang","sequence":"additional","affiliation":[{"name":"Duke Kunshan University, Suzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Retrieved","author":"UI.","year":"2024","unstructured":"AdvancedLivePortrait-WebUI. 2024. AdvancedLivePortrait-WebUI. Retrieved July 23, 2025 from https:\/\/github.com\/jhj0517\/AdvancedLivePortrait-WebUI."},{"key":"e_1_3_2_2_2_1","unstructured":"Wei Deng Siyi Zhou Jingchen Shu Jinchao Wang and Lu Wang. 2025. IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System. arXiv:2502.05512"},{"key":"e_1_3_2_2_3_1","unstructured":"Jianzhu Guo Dingyun Zhang Xiaoqiang Liu Zhizhou Zhong Yuan Zhang Pengfei Wan and Di Zhang. 2024. LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control. arXiv:2407.03168"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413532"},{"key":"e_1_3_2_2_5_1","first-page":"12077","volume-title":"Proceedings of the Conference on Neural Information Processing Systems (NeurIPS)","volume":"34","author":"Xie Enze","year":"2021","unstructured":"Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M Alvarez, and Ping Luo. 2021. SegFormer: Simple and efficient design for semantic segmentation with transformers. In Proceedings of the Conference on Neural Information Processing Systems (NeurIPS), Vol. 34. 12077-12090."},{"key":"e_1_3_2_2_6_1","unstructured":"Chaolong Yang Kai Yao Yuyao Yan Chenru Jiang Weiguang Zhao Jie Sun Guangliang Cheng Yifei Zhang Bin Dong and Kaizhu Huang. 2025. Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait. arXiv:2503.12963"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME59968.2025.11208955"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754462","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:40:16Z","timestamp":1765309216000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754462"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":7,"alternative-id":["10.1145\/3746027.3754462","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754462","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}