{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T10:14:10Z","timestamp":1777889650403,"version":"3.51.4"},"reference-count":90,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,19]],"date-time":"2025-10-19T00:00:00Z","timestamp":1760832000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,19]]},"DOI":"10.1109\/iccv51701.2025.02537","type":"proceedings-article","created":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T19:45:49Z","timestamp":1777491949000},"page":"27326-27337","source":"Crossref","is-referenced-by-count":0,"title":["SteerX: Creating Any Camera-Free 3D and 4D Scenes with Geometric Steering"],"prefix":"10.1109","author":[{"given":"Byeongjun","family":"Park","sequence":"first","affiliation":[{"name":"KAIST"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyojun","family":"Go","sequence":"additional","affiliation":[{"name":"EverEx"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyelin","family":"Nam","sequence":"additional","affiliation":[{"name":"EverEx"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Byung-Hoon","family":"Kim","sequence":"additional","affiliation":[{"name":"EverEx"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyungjin","family":"Chung","sequence":"additional","affiliation":[{"name":"EverEx"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changick","family":"Kim","sequence":"additional","affiliation":[{"name":"KAIST"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00566"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.02130"},{"key":"ref3","article-title":"Vd3d: Taming large video diffusion transformers for 3d camera control","author":"Bahmani","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01804"},{"key":"ref6","article-title":"Training diffusion models with reinforcement learning","volume-title":"The Twelfth International Conference on Learning Representations","author":"Black","year":"2024"},{"key":"ref7","article-title":"Stable video diffusion: Scaling latent video diffusion models to large datasets","author":"Blattmann","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01840"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3807511"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72664-4_21"},{"key":"ref13","article-title":"Diffusion posterior sampling for general noisy inverse problems","author":"Chung","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref14","article-title":"Decomposed diffusion sampler for accelerating large-scale inverse problems","author":"Chung","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00296"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref17","article-title":"A survey on diffusion models for inverse problems","author":"Daras","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref18","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref19","article-title":"Diffusion posterior sampling for linear inverse problem solving: A filtering perspective","volume-title":"The Twelfth International Conference on Learning Representations","author":"Dou","year":"2024"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-3437-9_1"},{"issue":"496","key":"ref21","doi-asserted-by":"crossref","DOI":"10.1198\/jasa.2011.tm11181","article-title":"Tweedie\u2019s formula and selection b ias","volume":"106","author":"Efron","year":"2011","journal-title":"Journal of the American Statistical Association"},{"key":"ref22","article-title":"Featup: A modelagnostic framework for features at any resolution","author":"Fu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref23","article-title":"Cat3d: Create anything in 3d with multi-view diffusion models","author":"Gao","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.02005"},{"key":"ref25","article-title":"Ltx-video: Realtime video latent diffusion","author":"HaCohen","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.52202\/068431-2027"},{"key":"ref27","article-title":"Cameractrl: Enabling camera control for text-to-video generation","author":"He","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref28","volume-title":"T3 bench: Benchmarking current progress in text-to-3d generation","author":"He","year":"2023"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"ref30","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.52202\/068431-0628"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3592433"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref35","article-title":"Hunyuanvideo: A systematic framework for large video generative models","author":"Kong","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref36","article-title":"Derivative-free guidance in continuous and discrete diffusion models with soft valuebased decoding","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2391"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00643"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"ref40","first-page":"22160","article-title":"D13dv-10k: A large-scale scene dataset for deep learning-based 3d vision","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Ling","year":"2024"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01419"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2026.3666733"},{"key":"ref43","article-title":"Syncdreamer: Generating multiview-consistent images from a single-view image","author":"Liu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref44","article-title":"Dpm-solver++: Fast solver for guided sampling of diffusion probabilistic models","author":"Lu","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01218"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00314"},{"key":"ref48","article-title":"Denoising task routing for diffusion models","author":"Park","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref49","article-title":"Switch diffusion transformer: Synergizing denoising tasks with sparse mixture-ofexperts","author":"Park","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00581"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480487"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"ref53","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"ref55","article-title":"A general framework for inference-time scaling and steering of diffusion models","author":"Singhal","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref56","article-title":"Splatt3r: Zero-shot gaussian splatting from uncalibrated image pairs","author":"Smart","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref57","article-title":"Pseudoinverse-guided diffusion models for inverse problems","volume-title":"International Conference on Learning Representations","author":"Song","year":"2023"},{"key":"ref58","article-title":"Score-based generative modeling through stochastic differential equations","author":"Song","journal-title":"arXiv preprint arXiv"},{"key":"ref59","article-title":"Dimensionx: Create any 3d and 4d scenes from a single image with controllable video diffusion","author":"Sun","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00972"},{"key":"ref61","article-title":"Mvdust3r+: Single-stage scene reconstruction from sparse views in 2 seconds","author":"Tang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref62","article-title":"Genmo Team","volume-title":"Mochi 1","year":"2024"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14507"},{"key":"ref64","article-title":"Reward-guided iterative refinement in diffusion models at test-time with applications to protein and dna design","author":"Uehara","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref65","article-title":"Superpoint gaussian splatting for real-time high-fidelity dynamic scene reconstruction","author":"Wan","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01214"},{"key":"ref67","article-title":"Shape of motion: 4 d reconstruction from a single video","author":"Wang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.52202\/079017-3409"},{"key":"ref70","article-title":"Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation","author":"Wang","year":"2024","journal-title":"Advances in Neural Information Processing Systems, 36"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657518"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01006"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01920"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1363"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.02427"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-024-0436-y"},{"key":"ref77","first-page":"15903","article-title":"Imagereward: learning and evaluating human preferences for text-to-image generation","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Xu","year":"2023"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00987"},{"key":"ref79","article-title":"Learn to optimize denoising scores for 3d generation: A unified and improved diffusion prior on nerf and 3d gaussian splatting","author":"Yang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref80","article-title":"Cogvideox: Text-to-video diffusion models with an expert transformer","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref81","article-title":"No pose, no problem: Surprisingly simple 3d gaussian splats from sparse unposed images","author":"Ye","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3613256"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00883"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01839"},{"key":"ref86","article-title":"Monst3r: A simple approach for estimating geometry in the presence of motion","author":"Zhang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref87","article-title":"Genxd: Generating any 3d and 4d scenes","author":"Zhao","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201323"},{"key":"ref89","article-title":"Hifa: Highfidelity text-to-3d generation with advanced diffusion guidance","author":"Zhu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref90","first-page":"22232232","article-title":"Unpaired image-to-image translation using cycleconsistent adversarial networks","volume-title":"Proceedings of the IEEE international conference on computer vision","author":"Zhu","year":"2017"}],"event":{"name":"2025 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,10,19]]},"end":{"date-parts":[[2025,10,25]]}},"container-title":["2025 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11443115\/11443287\/11445235.pdf?arnumber=11445235","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:18:30Z","timestamp":1777612710000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11445235\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,19]]},"references-count":90,"URL":"https:\/\/doi.org\/10.1109\/iccv51701.2025.02537","relation":{},"subject":[],"published":{"date-parts":[[2025,10,19]]}}}