{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T20:55:17Z","timestamp":1775163317485,"version":"3.50.1"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372491"],"award-info":[{"award-number":["62372491"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2023B1515120087"],"award-info":[{"award-number":["2023B1515120087"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012245","name":"Science and Technology Planning Project of Key Laboratory of Advanced IntelliSense Technology, Guangdong Science and Technology Department","doi-asserted-by":"publisher","award":["2023B1212060024"],"award-info":[{"award-number":["2023B1212060024"]}],"id":[{"id":"10.13039\/501100012245","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Special Financial Grant from China Postdoctoral Science Foundation","award":["2025T180433"],"award-info":[{"award-number":["2025T180433"]}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2026A1515012484"],"award-info":[{"award-number":["2026A1515012484"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012130","name":"Chinese Aeronautical Establishment","doi-asserted-by":"publisher","award":["2025L0150M1001"],"award-info":[{"award-number":["2025L0150M1001"]}],"id":[{"id":"10.13039\/501100012130","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62501502"],"award-info":[{"award-number":["62501502"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tip.2026.3676289","type":"journal-article","created":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T19:52:55Z","timestamp":1774554775000},"page":"3354-3368","source":"Crossref","is-referenced-by-count":0,"title":["POSITION: Open World 3D Scene CAD Recomposition"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-0010-0963","authenticated-orcid":false,"given":"Rongkun","family":"Yang","sequence":"first","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5667-9120","authenticated-orcid":false,"given":"Hongda","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yijun","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6896-1869","authenticated-orcid":false,"given":"Sheng","family":"Ao","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6178-6532","authenticated-orcid":false,"given":"Yongjian","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0429-0263","authenticated-orcid":false,"given":"Longguang","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaiwen","family":"Xue","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Company Ltd., Guizhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shunbo","family":"Zhou","sequence":"additional","affiliation":[{"name":"Huawei Cloud Computing Technologies Company Ltd., Guizhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0952-476X","authenticated-orcid":false,"given":"Yulan","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Electronics and Communication Engineering, Sun Yat-sen University, Shenzhen Campus, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3459800"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01868"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3263110"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3372449"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3005434"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3421952"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3240834"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3180341"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3279661"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3409052"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3567828"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00127"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3573524"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01158"},{"key":"ref15","article-title":"Automated creation of digital cousins for robust policy learning","author":"Dai","year":"2024","journal-title":"arXiv:2410.07408"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3618339"},{"key":"ref17","first-page":"80","article-title":"BEHAVIOR-1K: A benchmark for embodied ai with 1,000 everyday activities and realistic simulation","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Li"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561157"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561546"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58542-6_36"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00317"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72633-0_4"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_16"},{"key":"ref24","article-title":"SPARC: Sparse render-and-compare for CAD model alignment in a single RGB image","volume-title":"Proc. Brit. Mach. Vis. Conf. (BMVC)","author":"L\u00e4nger"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00399"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1554"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3605660"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3485518"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3544142"},{"key":"ref31","article-title":"GRUtopia: Dream general robots in a city at scale","author":"Wang","year":"2024","journal-title":"arXiv:2407.10943"},{"key":"ref32","article-title":"Hunyuan3D 2.0: Scaling diffusion models for high resolution textured 3D assets generation","author":"Zhao","year":"2025","journal-title":"arXiv:2501.12202"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00272"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01847"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3658236"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00264"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/3DV62453.2024.00066"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3193925"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref40","article-title":"Grounded SAM: Assembling open-world models for diverse visual tasks","author":"Ren","year":"2024","journal-title":"arXiv:2401.14159"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2025.3551648"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.52202\/079017-0688"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.52202\/075280-1516"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"ref45","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"139","author":"Radford"},{"key":"ref46","article-title":"GPT-4o system card","author":"Hurst","year":"2024","journal-title":"arXiv:2410.21276"},{"key":"ref47","article-title":"SAM 2: Segment anything in images and videos","author":"Ravi","year":"2024","journal-title":"arXiv:2408.00714"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610243"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2989"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00385"},{"key":"ref51","article-title":"Open-YOLO 3D: Towards fast and accurate open-vocabulary 3D instance segmentation","author":"El Amine Boudjoghra","year":"2024","journal-title":"arXiv:2406.02548"},{"key":"ref52","first-page":"1610","article-title":"OVIR-3D: Open-vocabulary 3D instance retrieval without training on 3D data","volume-title":"Proc. Conf. Robot Learn. (CoRL)","author":"Lu"},{"key":"ref53","article-title":"EmbodiedSAM: Online segment any 3D thing in real time","author":"Xu","year":"2024","journal-title":"arXiv:2408.11811"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00179"},{"key":"ref55","article-title":"Duoduo CLIP: Efficient 3D understanding with multi-view images","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Lee"},{"key":"ref56","article-title":"DINOv2: Learning robust visual features without supervision","author":"Oquab","year":"2023","journal-title":"arXiv:2304.07193"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.264"},{"key":"ref58","article-title":"Accelerating 3D deep learning with PyTorch3D","author":"Ravi","year":"2020","journal-title":"arXiv:2007.08501"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01670-0"},{"key":"ref60","article-title":"The replica dataset: A digital replica of indoor spaces","author":"Straub","year":"2019","journal-title":"arXiv:1906.05797"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_8"},{"key":"ref62","article-title":"ShapeNet: An information-rich 3D model repository","author":"Chang","year":"2015","journal-title":"arXiv:1512.03012"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02045"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01534-z"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP49359.2023.10222644"},{"key":"ref66","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Jaegle"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3179507"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160590"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01539"},{"key":"ref71","first-page":"44164","article-title":"SCCREAM: Scan, register, render and map: A framework for annotating accurate and dense 3D indoor scenes with a benchmark","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","volume":"37","author":"Jung"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/83\/11355710\/11456836.pdf?arnumber=11456836","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:53:27Z","timestamp":1775159607000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11456836\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":71,"URL":"https:\/\/doi.org\/10.1109\/tip.2026.3676289","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"value":"1057-7149","type":"print"},{"value":"1941-0042","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}