{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T08:44:32Z","timestamp":1776156272553,"version":"3.50.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0200700"],"award-info":[{"award-number":["2024YFE0200700"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2024YFE0200703"],"award-info":[{"award-number":["2024YFE0200703"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113495","type":"journal-article","created":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T07:32:44Z","timestamp":1774078364000},"page":"113495","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Multimodal human video generation with uncertainty-aware pose guidance"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8283-0646","authenticated-orcid":false,"given":"Jingyi","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9047-5362","authenticated-orcid":false,"given":"Han","family":"Yu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9956-2200","authenticated-orcid":false,"given":"Kun","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4636-1880","authenticated-orcid":false,"given":"Yuanyuan","family":"Meng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0345-1787","authenticated-orcid":false,"given":"Juncen","family":"Guo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7344-2174","authenticated-orcid":false,"given":"Yanda","family":"Meng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0810-1458","authenticated-orcid":false,"given":"Songwen","family":"Pei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2819-0200","authenticated-orcid":false,"given":"Jing","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1312-0146","authenticated-orcid":false,"given":"Yang","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8143-9052","authenticated-orcid":false,"given":"Liang","family":"Song","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113495_bib0001","series-title":"ICCV","first-page":"22680","article-title":"DreamPose: fashion video synthesis with stable diffusion","author":"Karras","year":"2023"},{"key":"10.1016\/j.patcog.2026.113495_bib0002","series-title":"CVPR","first-page":"10684","article-title":"High-resolution image synthesis with latent diffusion models","author":"Rombach","year":"2022"},{"key":"10.1016\/j.patcog.2026.113495_bib0003","series-title":"CVPR","first-page":"9326","article-title":"DISCO: disentangled control for realistic human dance generation","author":"Wang","year":"2024"},{"issue":"7","key":"10.1016\/j.patcog.2026.113495_bib0004","doi-asserted-by":"crossref","first-page":"8076","DOI":"10.1109\/TITS.2024.3376455","article-title":"Imagery overlap block compressive sensing with convex optimization","volume":"25","author":"Zhao","year":"2024","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"10.1016\/j.patcog.2026.113495_bib0005","series-title":"ICML","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.patcog.2026.113495_bib0006","series-title":"ICCV","first-page":"3836","article-title":"Adding conditional control to text-to-image diffusion models","author":"Zhang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113495_bib0007","first-page":"1","article-title":"AMLPF-CLIP: adaptive prompting and distilled learning for imbalanced histopathological image classification","author":"Yao","year":"2025","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"10.1016\/j.patcog.2026.113495_bib0008","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.110132","article-title":"HairManip: high quality hair manipulation via hair element disentangling","volume":"147","author":"Zhao","year":"2024","journal-title":"Pattern Recognit."},{"issue":"2","key":"10.1016\/j.patcog.2026.113495_bib0009","doi-asserted-by":"crossref","first-page":"2843","DOI":"10.1109\/TII.2023.3298476","article-title":"AMP-net: appearance-motion prototype network assisted automatic video anomaly detection system","volume":"20","author":"Liu","year":"2023","journal-title":"IEEE Trans. Ind. Inf."},{"key":"10.1016\/j.patcog.2026.113495_bib0010","series-title":"CVPR","first-page":"8153","article-title":"Animate anyone: consistent and controllable image-to-video synthesis for character animation","author":"Hu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0011","unstructured":"D. Chang, Y. Shi, Q. Gao, J. Fu, H. Xu, G. Song, Q. Yan, X. Yang, M. Soleymani, MagicDance: realistic human dance video generation with motions & facial expressions transfer, (2023). arXiv: 2311.12052."},{"key":"10.1016\/j.patcog.2026.113495_bib0012","series-title":"CVPR","first-page":"1481","article-title":"MagicAnimate: temporally consistent human image animation using diffusion model","author":"Xu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0013","doi-asserted-by":"crossref","DOI":"10.1016\/j.compeleceng.2023.109049","article-title":"TellMeTalk: multimodal-driven talking face video generation","volume":"114","author":"Li","year":"2024","journal-title":"Comput. Electr. Eng."},{"key":"10.1016\/j.patcog.2026.113495_bib0014","series-title":"BMVC","article-title":"DwNet: dense warp-based network for pose-guided human video generation","author":"Zablotskaia","year":"2019"},{"key":"10.1016\/j.patcog.2026.113495_bib0015","series-title":"CVPR","first-page":"12753","article-title":"Learning high fidelity depths of dressed humans by watching social media dance videos","author":"Jafarian","year":"2021"},{"key":"10.1016\/j.patcog.2026.113495_bib0016","series-title":"CVPR","first-page":"13653","article-title":"Motion representations for articulated animation","author":"Siarohin","year":"2021"},{"key":"10.1016\/j.patcog.2026.113495_bib0017","doi-asserted-by":"crossref","first-page":"5358","DOI":"10.1109\/TMM.2022.3190700","article-title":"LIQA: Lifelong blind image quality assessment","volume":"25","author":"Liu","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.patcog.2026.113495_bib0018","series-title":"ACL","first-page":"1309","article-title":"NUWA-XL: diffusion over diffusion for eXtremely long video generation","author":"Yin","year":"2023"},{"key":"10.1016\/j.patcog.2026.113495_bib0019","series-title":"ICML","first-page":"1737","article-title":"MultiDiffusion: fusing diffusion paths for controlled image generation","volume":"202","author":"Bar-Tal","year":"2023"},{"key":"10.1016\/j.patcog.2026.113495_bib0020","series-title":"SIGGRAPH Asia 2024 Conference Papers","first-page":"1","article-title":"Lumiere: a space-time diffusion model for video generation","author":"Bar-Tal","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0021","doi-asserted-by":"crossref","first-page":"361","DOI":"10.1016\/j.patcog.2016.05.030","article-title":"Robust arbitrary view gait recognition based on parametric 3D human body reconstruction and virtual posture synthesis","volume":"60","author":"Luo","year":"2016","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113495_bib0022","series-title":"CVPR","first-page":"5933","article-title":"Everybody dance now","author":"Chan","year":"2019"},{"key":"10.1016\/j.patcog.2026.113495_bib0023","series-title":"NeurIPS","first-page":"7135","article-title":"First order motion model for image animation","author":"Siarohin","year":"2019"},{"key":"10.1016\/j.patcog.2026.113495_bib0024","doi-asserted-by":"crossref","first-page":"5049","DOI":"10.1109\/COMST.2026.3669216","article-title":"Edge-cloud collaborative computing on distributed intelligence and model optimization: a survey","volume":"28","author":"Liu","year":"2026","journal-title":"IEEE Commun. Surv. Tutorials"},{"key":"10.1016\/j.patcog.2026.113495_bib0025","series-title":"ECCV","first-page":"561","article-title":"Keep it SMPL: automatic estimation of 3D human pose and shape from a single image","author":"Bogo","year":"2016"},{"key":"10.1016\/j.patcog.2026.113495_bib0026","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110758","article-title":"Efficient neural implicit representation for 3D human reconstruction","volume":"156","author":"Huang","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2026.113495_bib0027","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110986","article-title":"Stochastic video normality network for abnormal event detection in surveillance videos","volume":"280","author":"Liu","year":"2023","journal-title":"Knowl. Based Syst."},{"key":"10.1016\/j.patcog.2026.113495_bib0028","series-title":"ICLR","article-title":"Human motion diffusion model","author":"Tevet","year":"2023"},{"issue":"6","key":"10.1016\/j.patcog.2026.113495_bib0029","doi-asserted-by":"crossref","first-page":"4115","DOI":"10.1109\/TPAMI.2024.3355414","article-title":"MotionDiffuse: text-driven human motion generation with diffusion model","volume":"46","author":"Zhang","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2026.113495_bib0030","series-title":"ICML","first-page":"32939","article-title":"HumanTOMATO: text-aligned whole-body motion generation","author":"Lu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0031","series-title":"WACV","first-page":"5081","article-title":"Human motion aware text-to-video generation with explicit camera control","author":"Kim","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0032","article-title":"AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning","author":"Guo","year":"2024","journal-title":"ICLR"},{"key":"10.1016\/j.patcog.2026.113495_bib0033","first-page":"1","article-title":"Networking systems for video anomaly detection: a tutorial and survey","volume":"57","author":"Liu","year":"2025","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.patcog.2026.113495_bib0034","series-title":"ICML","first-page":"6263","article-title":"MagicPose: realistic human poses and facial expressions retargeting with identity-aware diffusion","author":"Chang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0035","series-title":"ECCV","first-page":"145","article-title":"Champ: controllable and consistent human image animation with 3d parametric guidance","author":"Zhu","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0036","doi-asserted-by":"crossref","first-page":"2351","DOI":"10.1109\/TIP.2025.3558089","article-title":"CRCL: causal representation consistency learning for anomaly detection in surveillance videos","volume":"34","author":"Liu","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113495_bib0037","series-title":"ICML","first-page":"12606","article-title":"Scaling rectified flow transformers for high-resolution image synthesis","author":"Esser","year":"2024"},{"key":"10.1016\/j.patcog.2026.113495_bib0038","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.113495_bib0039","series-title":"CVPR","first-page":"7291","article-title":"Realtime multi-person 2D pose estimation using part affinity fields","author":"Cao","year":"2017"},{"key":"10.1016\/j.patcog.2026.113495_bib0040","series-title":"ICCV Workshops","first-page":"4212","article-title":"Effective whole-body pose estimation with two-stages distillation","author":"Yang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113495_bib0041","series-title":"AAAI","first-page":"9942","article-title":"DSRC: learning density-insensitive and semantic-aware collaborative representation against corruptions","volume":"39","author":"Zhang","year":"2025"},{"key":"10.1016\/j.patcog.2026.113495_bib0042","unstructured":"B. Peng, J. Wang, Y. Zhang, W. Li, M.-C. Yang, J. Jia, ControlNeXt: powerful and Efficient Control for Image and Video Generation, (2024). arXiv: 2408.06070."},{"key":"10.1016\/j.patcog.2026.113495_bib0043","first-page":"118654","article-title":"TPC: test-time procrustes calibration for diffusion-based human image animation","volume":"37","author":"Yoon","year":"2024","journal-title":"NeurIPS"},{"key":"10.1016\/j.patcog.2026.113495_bib0044","series-title":"CVPR","first-page":"12391","article-title":"HumanDreamer: generating controllable human-motion videos via decoupled generation","author":"Wang","year":"2025"},{"key":"10.1016\/j.patcog.2026.113495_bib0045","unstructured":"H. Wang, H. Tang, D. Di, Z. Zhang, W. Zuo, F. Gao, S. Ma, S. Zhang, MoSA: motion-coherent human video generation via structure-appearance decoupling, (2025b). arXiv: 2508.17404."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004619?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326004619?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T07:47:13Z","timestamp":1776152833000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326004619"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":45,"alternative-id":["S0031320326004619"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113495","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Multimodal human video generation with uncertainty-aware pose guidance","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113495","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113495"}}