{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:55:48Z","timestamp":1781535348970,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62476069"],"award-info":[{"award-number":["62476069"]}]},{"name":"Science and Technology Innovation 2030 \u2013 \"New Generation Artificial Intelligence\" Major Project","award":["2021ZD0110901"],"award-info":[{"award-number":["2021ZD0110901"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810792","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"1822-1831","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Retrieval-Augmented Camera Control for Video Diffusion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4290-5858","authenticated-orcid":false,"given":"Lining","family":"Wang","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3298-2574","authenticated-orcid":false,"given":"Hongxun","family":"Yao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2553-0436","authenticated-orcid":false,"given":"Jinyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00539"},{"key":"e_1_3_3_1_3_2","unstructured":"Andreas Blattmann Tim Dockhorn Sumith Kulal Daniel Mendelevitch Maciej Kilian Dominik Lorenz Yam Levi Zion English Vikram Voleti Adam Letts et\u00a0al. 2023. Stable video diffusion: Scaling latent video diffusion models to large datasets. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.15127 (2023)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00782"},{"key":"e_1_3_3_1_5_2","volume-title":"The Thirteenth International Conference on Learning Representations, ICLR 2025","author":"He Hao","unstructured":"Hao He, Yinghao Xu, Yuwei Guo, Gordon Wetzstein, Bo Dai, Hongsheng Li, and Ceyuan Yang. [n. d.]. CameraCtrl: Enabling Camera Control for Video Diffusion Models. In The Thirteenth International Conference on Learning Representations, ICLR 2025."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00193"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00930"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"crossref","unstructured":"Bernhard Kerbl Georgios Kopanas Thomas Leimk\u00fchler and George Drettakis. 2023. 3D Gaussian Splatting for Real-Time Radiance Field Rendering. ACM Transactions on Graphics 42 4 (2023) 1\u201314.","DOI":"10.1145\/3592433"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Arno Knapitsch Jaesik Park Qian-Yi Zhou and Vladlen Koltun. 2017. Tanks and temples: Benchmarking large-scale scene reconstruction. ACM Transactions on Graphics (ToG) 36 4 (2017) 1\u201313.","DOI":"10.1145\/3072959.3073599"},{"key":"e_1_3_3_1_10_2","unstructured":"Dogyoon Lee Donghyeong Kim Jungho Lee Minhyeok Lee Seunghoon Lee and Sangyoun Lee. 2025. Sparse-DeRF: Deblurred Neural Radiance Fields from Sparse View. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01963"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00081"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","unstructured":"Kunhao Liu Ling Shao and Shijian Lu. 2024. Novel View Extrapolation with Video Diffusion Priors. CoRR abs\/2411.14208 (2024). arXiv:https:\/\/arXiv.org\/abs\/2411.1420810.48550\/ARXIV.2411.14208","DOI":"10.48550\/ARXIV.2411.14208"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00960"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00194"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28206"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02014"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Ben Mildenhall Pratul\u00a0P Srinivasan Rodrigo Ortiz-Cayon Nima\u00a0Khademi Kalantari Ravi Ramamoorthi Ren Ng and Abhishek Kar. 2019. Local light field fusion: Practical view synthesis with prescriptive sampling guidelines. ACM Transactions on Graphics (ToG) 38 4 (2019) 1\u201314.","DOI":"10.1145\/3306346.3322980"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00540"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.02012"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02092"},{"key":"e_1_3_3_1_23_2","unstructured":"Oriane Sim\u00e9oni Huy\u00a0V Vo Maximilian Seitzer Federico Baldassarre Maxime Oquab Cijo Jose Vasil Khalidov Marc Szafraniec Seungeun Yi Micha\u00ebl Ramamonjisoa et\u00a0al. 2025. Dinov3. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2508.10104 (2025)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","unstructured":"Chenxi Song Yanming Yang Tong Zhao Ruibo Li and Chi Zhang. 2025. WorldForge: Unlocking Emergent 3D\/4D Generation in Video Diffusion Model via Training-Free Guidance. CoRR abs\/2509.15130 (2025). arXiv:https:\/\/arXiv.org\/abs\/2509.1513010.48550\/ARXIV.2509.15130","DOI":"10.48550\/ARXIV.2509.15130"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","unstructured":"Bin Tan Nan Xue Tianfu Wu and Gui-Song Xia. 2023. NOPE-SAC: Neural One-Plane RANSAC for Sparse-View Planar 3D Reconstruction. IEEE Trans. Pattern Anal. Mach. Intell. 45 12 (2023) 15233\u201315248. 10.1109\/TPAMI.2023.3314745","DOI":"10.1109\/TPAMI.2023.3314745"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72640-8_10"},{"key":"e_1_3_3_1_27_2","unstructured":"Team Wan Ang Wang Baole Ai Bin Wen Chaojie Mao Chen-Wei Xie Di Chen Feiwu Yu Haiming Zhao Jianxiao Yang et\u00a0al. 2025. Wan: Open and advanced large-scale video generative models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.20314 (2025)."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00075"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00832"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00499"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01956"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657518"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02036"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00570"},{"key":"e_1_3_3_1_35_2","volume-title":"The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025","author":"Xiao Zeqi","unstructured":"Zeqi Xiao, Wenqi Ouyang, Yifan Zhou, Shuai Yang, Lei Yang, Jianlou Si, and Xingang Pan. [n. d.]. Trajectory attention for fine-grained video motion control. In The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025."},{"key":"e_1_3_3_1_36_2","unstructured":"Haolin Xiong Sairisheek Muttukuru Rishi Upadhyay Pradyumna Chari and Achuta Kadambi. 2023. Sparsegs: Real-time 360 sparse view synthesis using gaussian splatting. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.00206 (2023)."},{"key":"e_1_3_3_1_37_2","volume-title":"The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025","author":"Yang Zhuoyi","unstructured":"Zhuoyi Yang, Jiayan Teng, Wendi Zheng, Ming Ding, Shiyu Huang, Jiazheng Xu, Yuanming Yang, Wenyi Hong, Xiaohan Zhang, Guanyu Feng, Da Yin, Yuxuan Zhang, Weihan Wang, Yean Cheng, Bin Xu, Xiaotao Gu, Yuxiao Dong, and Jie Tang. [n. d.]. CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer. In The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025."},{"key":"e_1_3_3_1_38_2","volume-title":"The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025","author":"You Meng","year":"2025","unstructured":"Meng You, Zhiyu Zhu, Hui Liu, and Junhui Hou. 2025. NVS-Solver: Video Diffusion Model as Zero-Shot Novel View Synthesizer. In The Thirteenth International Conference on Learning Representations, ICLR 2025, Singapore, April 24-28, 2025. https:\/\/openreview.net\/forum?id=zDJf7fvdid"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51701.2025.00017"},{"key":"e_1_3_3_1_40_2","unstructured":"Wangbo Yu Jinbo Xing Li Yuan Wenbo Hu Xiaoyu Li Zhipeng Huang Xiangjun Gao Tien-Tsin Wong Ying Shan and Yonghong Tian. 2025. Viewcrafter: Taming video diffusion models for high-fidelity novel view synthesis. IEEE Transactions on Pattern Analysis and Machine Intelligence (2025)."},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00575"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28626"}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:43:07Z","timestamp":1781534587000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810792"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":42,"alternative-id":["10.1145\/3805622.3810792","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810792","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}