{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T06:01:43Z","timestamp":1779948103055,"version":"3.53.1"},"reference-count":93,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key RD Program of China","doi-asserted-by":"publisher","award":["2022YFF0902200"],"award-info":[{"award-number":["2022YFF0902200"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,20]]},"DOI":"10.1109\/3dv69130.2026.00142","type":"proceedings-article","created":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T19:40:49Z","timestamp":1779910849000},"page":"1-12","source":"Crossref","is-referenced-by-count":0,"title":["DevilSight: Augmenting Monocular Human Avatar Reconstruction Through a Virtual Perspective"],"prefix":"10.1109","author":[{"given":"Yushuo","family":"Chen","sequence":"first","affiliation":[{"name":"Tsinghua University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ruizhi","family":"Shao","sequence":"additional","affiliation":[{"name":"Tsinghua University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Youxin","family":"Pang","sequence":"additional","affiliation":[{"name":"Tsinghua University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongwen","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Normal University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinyi","family":"Wu","sequence":"additional","affiliation":[{"name":"Honor Device Co., Ltd"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rihui","family":"Wu","sequence":"additional","affiliation":[{"name":"Honor Device Co., Ltd"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yebin","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua University"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618153"},{"key":"ref2","article-title":"Guide3d: Create 3d avatars from text and image guidance","author":"Cao","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00097"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00698"},{"key":"ref5","article-title":"Generalizable human gaussians from single-view image","author":"Chen","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73113-6_15"},{"key":"ref7","article-title":"Scaling rectified flow transformers for high-resolution image synthesis","volume-title":"Forty-first international conference on machine learning","author":"Esser"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00846"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01358"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01236"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72946-1_19"},{"key":"ref12","article-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium","volume":"30","author":"Heusel","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00058"},{"key":"ref14","first-page":"5646","article-title":"Expressive gaussian human avatars from monocular rgb video","volume":"37","author":"Hu","year":"2025","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref15","first-page":"8153","article-title":"Animate anyone: Consistent and controllable image-to-video synthesis for character animation","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Hu"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00067"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01930"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00437"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0202"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00152"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00552"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01322"},{"key":"ref23","article-title":"Mvhuman: tailoring 2d diffusion with multi-view sampling for realistic 3d human generation","author":"Jiang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01623"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_24"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00055"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0462"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00107"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01879"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_25"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591490"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01864"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3519"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00150"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480528"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00853"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00635"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00951"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2816795.2818013"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3618368"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72940-9_2"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2367"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-025-02615-z"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00117"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01123"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref48","article-title":"Selfnerf: Fast training nerf for human from monocular self-rotating video","author":"Peng","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.01405"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00894"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1016\/j.gmod.2025.101266"},{"key":"ref52","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00480"},{"key":"ref54","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref55","article-title":"Sam 2: Segment anything in images and videos","author":"Ravi","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1810"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3687980"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00159"},{"key":"ref61","first-page":"12278","article-title":"A-nerf: Articulated neural radiance fields for learning human shape, appearance, and pose","volume":"34","author":"Su","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20086-1_7"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_25"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72943-0_22"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_1"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00891"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0334"},{"key":"ref68","article-title":"Emu3: Next-token prediction is all you need","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR62088.2024.00058"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73247-8_27"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0368"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2025.3618268"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00201"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01573"},{"key":"ref76","article-title":"Show-o: One single transformer to unify multimodal understanding and generation","author":"Xie","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref77","article-title":"Sv4d: Dynamic 3d content generation with multi-frame and multi-view consistency","author":"Xie","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-92591-7_25"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00147"},{"key":"ref80","article-title":"Diffusion 2: Dynamic 3d content generation via score composition of video and multi-view diffusion models","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref81","article-title":"Ipadapter: Text compatible image prompt adapter for text-toimage diffusion models","author":"Ye","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01625"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01839"},{"key":"ref84","article-title":"Avatarbooth: High-quality and customizable 3d human avatar generation","author":"Zeng","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.01162"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28540"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0488"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01543"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1145\/3592101"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73001-6_9"},{"key":"ref93","article-title":"Drivable 3d gaussian avatars","author":"Zielonka","year":"2023","journal-title":"arXiv preprint arXiv"}],"event":{"name":"2026 International Conference on 3D Vision (3DV)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2026,3,20]]},"end":{"date-parts":[[2026,3,23]]}},"container-title":["2026 International Conference on 3D Vision (3DV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11533157\/11533158\/11533213.pdf?arnumber=11533213","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T05:01:40Z","timestamp":1779944500000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11533213\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,20]]},"references-count":93,"URL":"https:\/\/doi.org\/10.1109\/3dv69130.2026.00142","relation":{},"subject":[],"published":{"date-parts":[[2026,3,20]]}}}