{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T06:01:48Z","timestamp":1779948108247,"version":"3.53.1"},"reference-count":73,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:00:00Z","timestamp":1773964800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100005950","name":"HKUST","doi-asserted-by":"publisher","award":["24251090T019"],"award-info":[{"award-number":["24251090T019"]}],"id":[{"id":"10.13039\/501100005950","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62302481"],"award-info":[{"award-number":["62302481"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,20]]},"DOI":"10.1109\/3dv69130.2026.00096","type":"proceedings-article","created":{"date-parts":[[2026,5,27]],"date-time":"2026-05-27T19:40:49Z","timestamp":1779910849000},"page":"956-966","source":"Crossref","is-referenced-by-count":0,"title":["CTR3D: Cross-View Token Reduction for Dense Multi-View Generation"],"prefix":"10.1109","author":[{"given":"Kunming","family":"Luo","sequence":"first","affiliation":[{"name":"Hong Kong University of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongyu","family":"Yan","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuan","family":"Liu","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zihao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Al for Industries CAS"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Manyuan","family":"Zhang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wenping","family":"Wang","sequence":"additional","affiliation":[{"name":"Texas A&#x0026;M University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ping","family":"Tan","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Stable video diffusion: Scaling latent video diffusion models to large datasets","author":"Blattmann","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00484"},{"key":"ref3","article-title":"Token merging: Your vit but faster","author":"Bolya","year":"2023","journal-title":"ICLR"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00600"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73247-8_20"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02033"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2025.3581312"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01999"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_24"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01781"},{"key":"ref13","author":"Gu","year":"2023","journal-title":"Nerfdiff: Single-image view synthesis with nerf-guided distillation from 3d-aware diffusion"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00085"},{"key":"ref15","article-title":"Lrm: Large reconstruction model for single image to 3d","author":"Hong","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00926"},{"key":"ref17","article-title":"Make-a-shape: a ten-million-scale 3d shape model","volume-title":"Forty-first International Conference on Machine Learning","author":"Hui","year":"2024"},{"key":"ref18","article-title":"Hunyuan3d-omni: A unified framework for controllable generation of 3d assets","author":"Zhang","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref19","article-title":"Perceiver io: A general architecture for structured inputs & outputs","author":"Jaegle","journal-title":"arxiv 2021. arXiv preprint arXiv"},{"key":"ref20","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","volume-title":"International conference on machine learning","author":"Jaegle","year":"2021"},{"key":"ref21","article-title":"Shap-e: Generating conditional 3d implicit functions","author":"Jun","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref22","article-title":"Spad: Spatially aware multiview diffusers","author":"Kant","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00141"},{"key":"ref24","article-title":"Spvit: Enabling faster vision transformers via soft token pruning","author":"Kong","year":"2021","journal-title":"arxiv"},{"key":"ref25","first-page":"3744","article-title":"Set transformer: A frame-work for attention-based permutation-invariant neural networks","volume-title":"International conference on machine learning","author":"Lee","year":"2019"},{"key":"ref26","article-title":"Instant3d: Fast text-to-3d with sparse-view generation and large reconstruction model","author":"Li","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref27","article-title":"Era3d: High-resolution multiview diffusion using efficient row-wise attention","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref28","article-title":"Sweetdreamer: Aligning geometric priors in 2d diffusion for consistent text-to-3d","author":"Li","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref29","article-title":"Craftsman: High-fidelity mesh generation with 3d native generation and interactive geometry refiner","author":"Li","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref30","article-title":"Gaussiandiffusion: 3d gaussian splatting for denoising diffusion probabilistic models with structured noise","author":"Li","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00715"},{"key":"ref32","article-title":"Unitex: Universal high fidelity generative texturing for 3d shapes","author":"Liang","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00037"},{"key":"ref35","author":"Liu","year":"2023","journal-title":"One-2\u20133-45: Any single image to 3 d mesh in 45 seconds without pershape optimization"},{"key":"ref36","article-title":"One-2\u20133-45: Any single image to 3d mesh in 45 seconds without per-shape optimization","author":"Liu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"ref39","article-title":"Syncdreamer: Generating multiview-consistent images from a single-view image","author":"Liu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72652-1_5"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00996"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.00951"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00816"},{"key":"ref44","article-title":"Dreamfusion: Text-to-3d using 2d diffusion","author":"Poole","year":"2022","journal-title":"arXiv preprint arXiv"},{"key":"ref45","article-title":"Magic123: One image to high-quality 3d object generation using both 2d and 3d diffusion priors","author":"Qian","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref46","article-title":"Dynamicvit: Efficient vision transformers with dynamic token sparsification","author":"Rao","year":"2021","journal-title":"NeurIPS"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.01042"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref49","first-page":"12786","article-title":"Tokenlearner: Adaptive space-time tokenization for videos","volume":"34","author":"Ryoo","year":"2021","journal-title":"Advances in neural information processing systems"},{"key":"ref50","article-title":"Zeronvs: Zero-shot 360-degree view synthesis from a single real image","author":"Sargent","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref51","article-title":"Zero123++: a single image to consistent multi-view diffusion base model","author":"Shi","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref52","article-title":"Mvdream: Multi-view diffusion for 3d generation","author":"Shi","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref53","article-title":"Denoising diffusion implicit models","author":"Song","year":"2020","journal-title":"arXiv preprint arXiv"},{"key":"ref54","article-title":"Dreamgaussian: Generative gaussian splatting for efficient 3d content creation","author":"Tang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref55","article-title":"Mvdiffusion: Enabling holistic multiview image generation with correspondence-aware diffusion","author":"Tang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72640-8_10"},{"key":"ref57","first-page":"175","article-title":"Mvdiffusion++: A dense highresolution multi-view diffusion model for single or sparseview 3d object reconstruction","volume-title":"European Conference on Computer Vision","author":"Tang","year":"2025"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73232-4_25"},{"key":"ref59","article-title":"Neus: Learning neural implicit surfaces by volume rendering for multi-view reconstruction","author":"Wang","year":"2021","journal-title":"arXiv preprint arXiv"},{"key":"ref60","article-title":"Pf-lrm: Pose-free large reconstruction model for joint pose and shape prediction","author":"Wang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref61","article-title":"Prolificdreamer: High-fidelity and diverse text-to-3d generation with variational score distillation","author":"Wang","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref62","article-title":"Direct3d: Scalable image-to-3d generation via 3d latent diffusion transformer","author":"Wu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52734.2025.02000"},{"key":"ref64","article-title":"Instantmesh: Efficient 3d mesh generation from a single image with sparse-view large reconstruction models","author":"Xu","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref65","article-title":"Dmv3d: Denoising multi-view diffusion using 3d large reconstruction model","author":"Xu","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref66","article-title":"Posemaster: Generating 3d characters in arbitrary poses from a single image","author":"Yan","year":"2025","journal-title":"arXiv preprint arXiv"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681634"},{"key":"ref68","article-title":"Hunyuan3d 1.0: A unified framework for text-to-3d and image-to-3d generation","author":"Yang","year":"2024","journal-title":"arXiv preprint arXiv"},{"key":"ref69","article-title":"Gaussiandreamer: Fast generation from text to 3d gaussian splatting with point cloud priors","author":"Yi","year":"2023","journal-title":"arXiv preprint arXiv"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01054"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1145\/3658146"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_26"}],"event":{"name":"2026 International Conference on 3D Vision (3DV)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2026,3,20]]},"end":{"date-parts":[[2026,3,23]]}},"container-title":["2026 International Conference on 3D Vision (3DV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11533157\/11533158\/11533281.pdf?arnumber=11533281","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T05:02:31Z","timestamp":1779944551000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11533281\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,20]]},"references-count":73,"URL":"https:\/\/doi.org\/10.1109\/3dv69130.2026.00096","relation":{},"subject":[],"published":{"date-parts":[[2026,3,20]]}}}