{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T07:12:08Z","timestamp":1778051528864,"version":"3.51.4"},"reference-count":66,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T00:00:00Z","timestamp":1772755200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,6]]},"DOI":"10.1109\/wacv61042.2026.00303","type":"proceedings-article","created":{"date-parts":[[2026,5,5]],"date-time":"2026-05-05T19:59:32Z","timestamp":1778011172000},"page":"3098-3108","source":"Crossref","is-referenced-by-count":0,"title":["TalkingPose: Efficient Face and Gesture Animation with Feedback-guided Diffusion Model"],"prefix":"10.1109","author":[{"given":"Alireza","family":"Javanmardi","sequence":"first","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pragati","family":"Jaiswal","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tewodros Amberbir","family":"Habtegebrial","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Christen","family":"Millerdurai","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shaoxiang","family":"Wang","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alain","family":"Pagani","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Didier","family":"Stricker","sequence":"additional","affiliation":[{"name":"German Research Center for Artificial Intelligence (DFKI)"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/276"},{"key":"ref2","article-title":"Stable video diffusion: Scaling latent video diffusion models to large datasets","author":"Blattmann","year":"2023"},{"key":"ref3","article-title":"Magicpose: Realistic human poses and facial expressions retargeting with identity-aware diffusion","volume-title":"Forty-first International Conference on Machine Learning","author":"Chang"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54427-4_19"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"ref6","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00675"},{"issue":"69","key":"ref8","first-page":"2383","article-title":"Waffles: A machine learning toolkit","volume":"12","author":"Gashler","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref9","article-title":"Generative adversarial nets","volume":"27","author":"Goodfellow","year":"2014","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref10","article-title":"Animatediff: Animate your personalized text-to-image diffusion models without specific tuning","volume-title":"International Conference on Learning Representations","author":"Guo"},{"key":"ref11","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"47","key":"ref12","first-page":"1","article-title":"Cascaded diffusion models for high fidelity image generation","volume":"23","author":"Ho","year":"2022","journal-title":"Journal of Machine Learning Research"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0628"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2010.579"},{"key":"ref15","first-page":"8153","article-title":"Animate anyone: Consistent and controllable image-to-video synthesis for character animation","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Hu"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.00951"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00594"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01256"},{"key":"ref19","article-title":"Loopy: Taming audio-driven portrait avatar with long-term motion dependency","author":"Jiang","year":"2024"},{"key":"ref20","volume-title":"Linear Systems","author":"Kailath","year":"1980"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73202-7_19"},{"key":"ref23","article-title":"Auto-encoding variational bayes","volume-title":"2nd International Conference on Learning Representations (ICLR)","author":"Kingma"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00524"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3130800.3130813"},{"key":"ref26","article-title":"Cyberhost: Taming audio-driven avatar diffusion model with region codebook attention","author":"Lin","year":"2024"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00780"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"ref29","article-title":"Mediapipe: A framework for perceiving and processing reality","volume-title":"Proceedings of the Third Workshop on Computer Vision for AR\/VR at IEEE Computer Vision and Pattern Recognition (CVPR)","author":"Lugaresi"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687587"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01973"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01198"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01919"},{"key":"ref36","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref40","article-title":"Progressive distillation for fast sampling of diffusion models","volume-title":"International Conference on Learning Representations (ICLR)","author":"Salimans"},{"key":"ref41","article-title":"First order motion model for image animation","volume":"32","author":"Siarohin","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01344"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2006.09661"},{"key":"ref44","article-title":"Denoising diffusion implicit models","author":"Song","year":"2020"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818056"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.262"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1163\/2213-2139_emc_emcsim_00933"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.01965"},{"key":"ref49","article-title":"Towards accurate generative models of video: A new metric & challenges","volume-title":"International Conference on Learning Representations (ICLR)","author":"Unterthiner"},{"key":"ref50","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00891"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"ref53","article-title":"Magicvideo-v2: Multi-stage high-aesthetic video generation","author":"Wang","year":"2024"},{"key":"ref54","article-title":"Latent image animator: Learning to animate images via latent space navigation","volume-title":"International Conference on Learning Representations","author":"Wang"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref56","article-title":"Understanding and stabilizing gans\u2019 training dynamics using control theory","volume-title":"Proceedings of the 37th International Conference on Machine Learning (ICML)","author":"Xu"},{"key":"ref57","article-title":"Vasa-1: Lifelike audio-driven talking faces generated in real time","author":"Xu","year":"2024"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00189"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00147"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19778-9_4"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00455"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.73"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref64","article-title":"Mimicmotion: High-quality human motion video generation with confidence-aware pose guidance","volume-title":"International Conference on Machine Learning","author":"Zhang"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73001-6_9"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00444"}],"event":{"name":"2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)","location":"Tucson, AZ, USA","start":{"date-parts":[[2026,3,6]]},"end":{"date-parts":[[2026,3,10]]}},"container-title":["2026 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11491838\/11491925\/11492563.pdf?arnumber=11492563","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T06:16:19Z","timestamp":1778048179000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11492563\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,6]]},"references-count":66,"URL":"https:\/\/doi.org\/10.1109\/wacv61042.2026.00303","relation":{},"subject":[],"published":{"date-parts":[[2026,3,6]]}}}