{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T06:58:55Z","timestamp":1764053935768,"version":"3.40.3"},"publisher-location":"Cham","reference-count":78,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031726453"},{"type":"electronic","value":"9783031726460"}],"license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72646-0_6","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T08:45:29Z","timestamp":1730105129000},"page":"96-113","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Zero-Shot Multi-object Scene Completion"],"prefix":"10.1007","author":[{"given":"Shun","family":"Iwase","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Katherine","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vitor","family":"Guizilini","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adrien","family":"Gaidon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kris","family":"Kitani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rare\u015f","family":"Ambru\u015f","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sergey","family":"Zakharov","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","unstructured":"Boulch, A., Marlet, R.: POCO: point Convolution for Surface Reconstruction. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00620"},{"key":"6_CR2","unstructured":"Bozic, A., Palafox, P., Thies, J., Dai, A., Nie\u00dfner, M.: TransformerFusion: monocular RGB scene reconstruction using transformers. In: NeurIPS (2021)"},{"key":"6_CR3","doi-asserted-by":"crossref","unstructured":"Chan, E.R., et al.: GeNVS: generative novel view synthesis with 3D-aware diffusion models. CoRR (2023)","DOI":"10.1109\/ICCV51070.2023.00389"},{"key":"6_CR4","doi-asserted-by":"publisher","unstructured":"Chen, HX., Huang, J., Mu, TJ., Hu, SM.: CIRCLE: convolutional implicit reconstruction and completion for large-scale indoor scene. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision, ECCV 2022. LNCS, vol. 13692, pp. 506\u2013522. Springer, Cham (2022).. https:\/\/doi.org\/10.1007\/978-3-031-19824-3_30","DOI":"10.1007\/978-3-031-19824-3_30"},{"key":"6_CR5","doi-asserted-by":"crossref","unstructured":"Cheng, Y.C., Lee, H.Y., Tulyakov, S., Schwing, A.G., Gui, L.Y.: SDFusion: multimodal 3D shape completion, reconstruction, and generation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00433"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., Savarese, S.: 4D spatio-temporal ConvNets: Minkowski convolutional neural networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00319"},{"key":"6_CR7","unstructured":"Chu, X., Tian, Z., Zhang, B., Wang, X., Shen, C.: Conditional positional encodings for vision transformers. In: ICLR (2023)"},{"key":"6_CR8","unstructured":"Together Computer: RedPajama: an open dataset for training large language models (2023)"},{"key":"6_CR9","doi-asserted-by":"crossref","unstructured":"Dai, A., Diller, C., Nie\u00dfner, M.: SG-NN: sparse generative neural networks for self-supervised scene completion of RGB-D scans. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00093"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Dai, A., Ritchie, D., Bokeloh, M., Reed, S., Sturm, J., Nie\u00dfner, M.: ScanComplete: large-scale scene completion and semantic segmentation for 3D scans. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00481"},{"key":"6_CR11","unstructured":"Dao, T.: FlashAttention-2: faster attention with better parallelism and work partitioning (2023)"},{"key":"6_CR12","doi-asserted-by":"crossref","unstructured":": Deitke, M., et al.: Objaverse: a universe of annotated 3D objects. In: CVPR (2022)","DOI":"10.1109\/CVPR52729.2023.01263"},{"key":"6_CR13","doi-asserted-by":"publisher","first-page":"4901","DOI":"10.21105\/joss.04901","volume":"8","author":"M Denninger","year":"2023","unstructured":"Denninger, M., et al.: BlenderProc2: a procedural pipeline for photorealistic rendering. J. Open Source Softw. 8, 4901 (2023)","journal-title":"J. Open Source Softw."},{"key":"6_CR14","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL (2019)"},{"key":"6_CR15","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"key":"6_CR16","doi-asserted-by":"crossref","unstructured":"Downs, L., et al.: Google scanned objects: a high-quality dataset of 3D scanned household items. In: ICRA (2022)","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"6_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/978-3-030-58598-3_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Duan","year":"2020","unstructured":"Duan, Y., Zhu, H., Wang, H., Yi, L., Nevatia, R., Guibas, L.J.: Curriculum DeepSDF. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12353, pp. 51\u201367. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58598-3_4"},{"key":"6_CR18","unstructured":"Dupont, E., Kim, H., Eslami, S.M.A., Rezende, D.J., Rosenbaum, D.: From data to functa: your data point is a function and you can treat it like one. In: ICML (2022)"},{"key":"6_CR19","unstructured":"Gao, P., Ma, T., Li, H., Dai, J., Qiao, Y.: ConvMAE: masked convolution meets masked autoencoders. In: NeurIPS (2022)"},{"key":"6_CR20","unstructured":"Goldblum, M., Finzi, M., Rowan, K., Wilson, A.G.: The no free lunch theorem, Kolmogorov complexity, and the role of inductive biases in machine learning. CoRR (2023)"},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Graham, B., Engelcke, M., van\u00a0der Maaten, L.: 3D semantic segmentation with submanifold sparse convolutional networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00961"},{"key":"6_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Hou, J., Dai, A., Nie\u00dfner, M.: RevealNet: seeing behind objects in RGB-D scans. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00217"},{"key":"6_CR24","doi-asserted-by":"crossref","unstructured":"Huang, J., Gojcic, Z., Atzmon, M., Litany, O., Fidler, S., Williams, F.: Neural kernel surface reconstruction. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00425"},{"key":"6_CR25","doi-asserted-by":"publisher","unstructured":"Irshad, M.Z., Zakharov, S., Ambrus, R., Kollar, T., Kira, Z., Gaidon, A.: ShAPO: implicit representations for multi-object shape, appearance, and pose optimization. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision, ECCV 2022. LNCS, vol. 13662 (2022). Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-031-20086-1_16","DOI":"10.1007\/978-3-031-20086-1_16"},{"key":"6_CR26","doi-asserted-by":"crossref","unstructured":"Kappler, D., et al.: Real-time perception meets reactive motion generation. RA-L (2018)","DOI":"10.1109\/LRA.2018.2795645"},{"key":"6_CR27","doi-asserted-by":"publisher","first-page":"846","DOI":"10.1177\/0278364911406761","volume":"30","author":"S Karaman","year":"2011","unstructured":"Karaman, S., Frazzoli, E.: Sampling-based algorithms for optimal motion planning. Int. J. Rob. Res. 30, 846\u2013894 (2011)","journal-title":"Int. J. Rob. Res."},{"key":"6_CR28","doi-asserted-by":"crossref","unstructured":"Kaskman, R., Zakharov, S., Shugurov, I., Ilic, S.: HomebrewedDB: RGB-D dataset for 6D pose estimation of 3D objects. In: ICCVW (2019)","DOI":"10.1109\/ICCVW.2019.00338"},{"key":"6_CR29","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"6_CR30","unstructured":"Labb\u00e9, Y., et al.: MegaPose: 6D pose estimation of novel objects via render & compare. In: CoRL (2022)"},{"key":"6_CR31","doi-asserted-by":"crossref","unstructured":"Li, J., Han, K., Wang, P., Liu, Y., Yuan, X.: Anisotropic convolutional networks for 3D semantic scene completion. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00341"},{"key":"6_CR32","doi-asserted-by":"crossref","unstructured":"Li, J., et al.: RGBD based dimensional decomposition residual network for 3D semantic scene completion. In: CVPR, June 2019, pp. 7693\u20137702 (2019)","DOI":"10.1109\/CVPR.2019.00788"},{"key":"6_CR33","doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"6_CR34","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: VoxFormer: sparse voxel transformer for camera-based 3D semantic scene completion. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00877"},{"key":"6_CR35","doi-asserted-by":"crossref","unstructured":"Liang, F., et al.: Open-vocabulary semantic segmentation with mask-adapted CLIP. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"6_CR36","doi-asserted-by":"crossref","unstructured":"Lin, Y., Tremblay, J., Tyree, S., Vela, P.A., Birchfield, S.: Multi-view fusion for multi-level robotic scene understanding. In: IROS (2021)","DOI":"10.1109\/IROS51168.2021.9635994"},{"key":"6_CR37","unstructured":"Liu, L., Gu, J., Lin, K.Z., Chua, T.S., Theobalt, C.: Neural sparse voxel fields. In: NeurIPS (2020)"},{"key":"6_CR38","unstructured":"Liu, M., et\u00a0al.: One-2-3-45: any single image to 3D mesh in 45 seconds without per-shape optimization. In: NeurIPS (2023)"},{"key":"6_CR39","doi-asserted-by":"crossref","unstructured":"Liu, R., Wu, R., Hoorick, B.V., Tokmakov, P., Zakharov, S., Vondrick, C.: Zero-1-to-3: zero-shot one image to 3D object. In: CVPR (2023)","DOI":"10.1109\/ICCV51070.2023.00853"},{"key":"6_CR40","unstructured":"Liu, Z., Feng, Y., Black, M.J., Nowrouzezahrai, D., Paull, L., Liu, W.: MeshDiffusion: score-based generative 3D mesh modeling. In: ICLR (2023)"},{"key":"6_CR41","doi-asserted-by":"crossref","unstructured":"Lorensen, W.E., Cline, H.E.: Marching cubes: a high resolution 3D surface construction algorithm. In: SIGGRAPH (1987)","DOI":"10.1145\/37401.37422"},{"key":"6_CR42","doi-asserted-by":"crossref","unstructured":"Mescheder, L., Oechsle, M., Niemeyer, M., Nowozin, S., Geiger, A.: Occupancy networks: learning 3D reconstruction in function space. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00459"},{"key":"6_CR43","doi-asserted-by":"crossref","unstructured":"Mittal, P., Cheng, Y.C., Singh, M., Tulsiani, S.: AutoSDF: shape priors for 3D completion, reconstruction and generation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00040"},{"key":"6_CR44","doi-asserted-by":"crossref","unstructured":"Mohammadi, S.S., et al.: 3DSGrasp: 3D shape-completion for robotic grasp. In: ICRA (2023)","DOI":"10.1109\/ICRA48891.2023.10160350"},{"key":"6_CR45","doi-asserted-by":"crossref","unstructured":"Museth, K.: VDB: high-resolution sparse volumes with dynamic topology (2013)","DOI":"10.1145\/2487228.2487235"},{"key":"6_CR46","doi-asserted-by":"crossref","unstructured":"Okumura, K., D\u00e9fago, X.: Quick multi-robot motion planning by combining sampling and search. In: IJCAI (2023)","DOI":"10.24963\/ijcai.2023\/29"},{"key":"6_CR47","doi-asserted-by":"crossref","unstructured":"Park, J.J., Florence, P., Straub, J., Newcombe, R., Lovegrove, S.: DeepSDF: learning continuous signed distance functions for shape representation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00025"},{"key":"6_CR48","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1007\/978-3-030-58580-8_31","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Peng","year":"2020","unstructured":"Peng, S., Niemeyer, M., Mescheder, L., Pollefeys, M., Geiger, A.: Convolutional occupancy networks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12348, pp. 523\u2013540. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58580-8_31"},{"key":"6_CR49","unstructured":"Rabe, M.N., Staats, C.: Self-attention does not need $$O(n^2)$$ memory (2021)"},{"key":"6_CR50","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"6_CR51","unstructured":"Radford, A., Narasimhan, K.: Improving language understanding by generative pre-training (2018)"},{"key":"6_CR52","doi-asserted-by":"crossref","unstructured":"Reizenstein, J., Shapovalov, R., Henzler, P., Sbordone, L., Labatut, P., Novotny, D.: Common objects in 3D: large-scale learning and evaluation of real-life 3D category reconstruction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01072"},{"key":"6_CR53","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models (2021)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"6_CR54","unstructured":"Schuhmann, C., et\u00a0al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: NeurIPS (2022)"},{"key":"6_CR55","first-page":"2403","volume":"26","author":"T Shao","year":"2020","unstructured":"Shao, T., Yang, Y., Weng, Y., Hou, Q., Zhou, K.: H-CNN: spatial hashing based CNN for 3D shape analysis. TVCG 26, 2403\u20132416 (2020)","journal-title":"TVCG"},{"key":"6_CR56","unstructured":"Shen, T., Gao, J., Yin, K., Liu, M.Y., Fidler, S.: Deep marching tetrahedra: a hybrid representation for high-resolution 3D shape synthesis. In: NeurIPS (2021)"},{"key":"6_CR57","doi-asserted-by":"crossref","unstructured":"Shi, Z., Zhou, X., Qiu, X., Zhu, X.: Improving image captioning with better use of captions. CoRR (2020)","DOI":"10.18653\/v1\/2020.acl-main.664"},{"key":"6_CR58","doi-asserted-by":"crossref","unstructured":"Song, S., Yu, F., Zeng, A., Chang, A.X., Savva, M., Funkhouser, T.: Semantic scene completion from a single depth image. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.28"},{"key":"6_CR59","unstructured":"Su, J., Lu, Y., Pan, S., Wen, B., Liu, Y.: RoFormer: enhanced transformer with rotary position embedding. In: ICLR (2020)"},{"key":"6_CR60","doi-asserted-by":"crossref","unstructured":"Varley, J., DeChant, C., Richardson, A., Ruales, J., Allen, P.: Shape completion enabled robotic grasping. In: IROS (2017)","DOI":"10.1109\/IROS.2017.8206060"},{"key":"6_CR61","doi-asserted-by":"crossref","unstructured":"Wang, P.S.: OctFormer: octree-based transformers for 3D point clouds. In: SIGGRAPH (2023)","DOI":"10.1145\/3592131"},{"key":"6_CR62","doi-asserted-by":"crossref","unstructured":"Wang, P.S., Liu, Y., Guo, Y.X., Sun, C.Y., Tong, X.: O-CNN: octree-based convolutional neural networks for 3D shape analysis. In: SIGGRAPH (2017)","DOI":"10.1145\/3072959.3073608"},{"key":"6_CR63","doi-asserted-by":"crossref","unstructured":"Wang, P.S., Liu, Y., Tong, X.: Deep octree-based CNNs with output-guided skip connections for 3D shape and scene completion. In: CVPRW (2020)","DOI":"10.1109\/CVPRW50498.2020.00141"},{"key":"6_CR64","unstructured":"Watson, D., Chan, W., Martin-Brualla, R., Ho, J., Tagliasacchi, A., Norouzi, M.: Novel view synthesis with diffusion models. CoRR (2022)"},{"key":"6_CR65","doi-asserted-by":"crossref","unstructured":"Williams, F., et al.: Neural fields as learnable kernels for 3D reconstruction. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01795"},{"key":"6_CR66","doi-asserted-by":"crossref","unstructured":"Wu, C.Y., Johnson, J., Malik, J., Feichtenhofer, C., Gkioxari, G.: Multiview compressive coding for 3D reconstruction. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00875"},{"key":"6_CR67","unstructured":"Wu, X., Lao, Y., Jiang, L., Liu, X., Zhao, H.: Point transformer V2: grouped vector attention and partition-based pooling. In: NeurIPS (2022)"},{"key":"6_CR68","doi-asserted-by":"crossref","unstructured":"Xiang, Y., Schmidt, T., Narayanan, V., Fox, D.: PoseCNN: a convolutional neural network for 6D object pose estimation in cluttered scenes (2018)","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"6_CR69","doi-asserted-by":"crossref","unstructured":"Xie, S., Girshick, R., Doll\u00e1r, P., Tu, Z., He, K.: Aggregated residual transformations for deep neural networks. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.634"},{"key":"6_CR70","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: ODISE: open-vocabulary panoptic segmentation with text-to-image diffusion models. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"6_CR71","doi-asserted-by":"crossref","unstructured":"Yan, X., Lin, L., Mitra, N.J., Lischinski, D., Cohen-Or, D., Huang, H.: ShapeFormer: transformer-based shape completion via sparse representation. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00614"},{"key":"6_CR72","doi-asserted-by":"crossref","unstructured":"Yu, X., Rao, Y., Wang, Z., Liu, Z., Lu, J., Zhou, J.: PoinTr: diverse point cloud completion with geometry-aware transformers. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01227"},{"key":"6_CR73","doi-asserted-by":"crossref","unstructured":"Zhai, X., Kolesnikov, A., Houlsby, N., Beyer, L.: Scaling vision transformers. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"6_CR74","unstructured":"Zhang, D., Choi, C., Park, I., Kim, Y.M.: Probabilistic implicit scene completion. In: ICLR (2022)"},{"key":"6_CR75","unstructured":"Zhang, H., et al.: GLIPv2: unifying localization and vision-language understanding. CoRR (2022)"},{"key":"6_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, P., Liu, W., Lei, Y., Lu, H., Yang, X.: Cascaded context pyramid for full-resolution 3D semantic scene completion. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00789"},{"key":"6_CR77","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P.H., Koltun, V.: Point transformer. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"6_CR78","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Tian, Y., Mexatas, D., Doll\u00e1r, P.: Semantic amodal segmentation. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.320"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72646-0_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T08:47:16Z","timestamp":1730105236000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72646-0_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"ISBN":["9783031726453","9783031726460"],"references-count":78,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72646-0_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"28 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}