{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T19:31:50Z","timestamp":1780515110901,"version":"3.54.1"},"publisher-location":"Cham","reference-count":71,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726231","type":"print"},{"value":"9783031726248","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72624-8_4","type":"book-chapter","created":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T09:52:13Z","timestamp":1729849933000},"page":"55-72","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":72,"title":["OccWorld: Learning a\u00a03D Occupancy World Model for\u00a0Autonomous Driving"],"prefix":"10.1007","author":[{"given":"Wenzhao","family":"Zheng","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weiliang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuanhui","family":"Huang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Borui","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yueqi","family":"Duan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,10,26]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Berman, M., Triki, A.R., Blaschko, M.B.: The lov\u00e1sz-softmax loss: a tractable surrogate for the optimization of the intersection-over-union measure in neural networks. In: CVPR, pp. 4413\u20134421 (2018)","DOI":"10.1109\/CVPR.2018.00464"},{"key":"4_CR2","unstructured":"Brown, T., et al.: Language models are few-shot learners. In: NeurIPS, vol. 33, pp. 1877\u20131901 (2020)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Cao, A.Q., de\u00a0Charette, R.: MonoScene: monocular 3D semantic scene completion. In: CVPR, pp. 3991\u20134001 (2022)","DOI":"10.1109\/CVPR52688.2022.00396"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Cao, A.Q., de\u00a0Charette, R.: SceneRF: self-supervised monocular 3D scene reconstruction with radiance fields. In: ICCV, pp. 9387\u20139398 (2023)","DOI":"10.1109\/ICCV51070.2023.00861"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Lin, K.Y., Qian, C., Zeng, G., Li, H.: 3D sketch-aware semantic scene completion via semi-supervised structure prior. In: CVPR, pp. 4193\u20134202 (2020)","DOI":"10.1109\/CVPR42600.2020.00425"},{"key":"4_CR7","doi-asserted-by":"crossref","unstructured":"Cheng, R., Razani, R., Taghavi, E., Li, E., Liu, B.: 2-S3Net: attentive feature fusion with adaptive feature selection for sparse semantic segmentation network. In: CVPR, pp. 12547\u201312556 (2021)","DOI":"10.1109\/CVPR46437.2021.01236"},{"key":"4_CR8","unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. In: ICLR (2020)"},{"key":"4_CR9","unstructured":"Gao, R., et al.: MagicDrive: street view generation with diverse 3D geometry control. arXiv preprint arXiv:2310.02601 (2023)"},{"key":"4_CR10","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: NeurIPS, vol. 27 (2014)"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Gu, J., et al.: ViP3D: end-to-end visual trajectory prediction via 3D agent queries. arXiv preprint arXiv:2208.01582 (2022)","DOI":"10.1109\/CVPR52729.2023.00532"},{"key":"4_CR12","unstructured":"Ha, D., Schmidhuber, J.: World models. arXiv preprint arXiv:1803.10122 (2018)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Hu, A., et al.: FIERY: future instance prediction in bird\u2019s-eye view from surround monocular cameras. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01499"},{"key":"4_CR15","unstructured":"Hu, A., et al.: GAIA-1: a generative world model for autonomous driving. arXiv preprint arXiv:2309.17080 (2023)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Hu, P., Huang, A., Dolan, J., Held, D., Ramanan, D.: Safe local motion planning with self-supervised freespace forecasting. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01254"},{"key":"4_CR17","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1007\/978-3-031-19839-7_31","volume-title":"ECCV 2022","author":"S Hu","year":"2022","unstructured":"Hu, S., Chen, L., Wu, P., Li, H., Yan, J., Tao, D.: ST-P3: end-to-end vision-based autonomous driving via spatial-temporal feature learning. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13698, pp. 533\u2013549. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19839-7_31"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Hu, Y., et al.: Planning-oriented autonomous driving. In: CVPR, pp. 17853\u201317862 (2023)","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"4_CR19","unstructured":"Huang, J., Huang, G., Zhu, Z., Du, D.: BEVDet: high-performance multi-camera 3D object detection in bird-eye-view. arXiv preprint arXiv:2112.11790 (2021)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, B., Zhou, J., Lu, J.: SelfOcc: self-supervised vision-based 3D occupancy prediction. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01885"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J.: Tri-perspective view for vision-based 3D semantic occupancy prediction. In: CVPR, pp. 9223\u20139232 (2023)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J.: GaussianFormer: scene as gaussians for vision-based 3D semantic occupancy prediction. In: ECCV (2024)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"4_CR23","doi-asserted-by":"crossref","unstructured":"Huang, Z., Liu, H., Lv, C.: GameFormer: game-theoretic modeling and learning of transformer-based interactive prediction and planning for autonomous driving. arXiv preprint arXiv:2303.05760 (2023)","DOI":"10.1109\/ICCV51070.2023.00361"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Huang, Z., Liu, H., Wu, J., Lv, C.: Differentiable integrated motion prediction and planning with learnable cost function for autonomous driving. IEEE Trans. Neural Netw. Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2023.3283542"},{"key":"4_CR25","unstructured":"Jiang, B., et al.: Perceive, interact, predict: learning dynamic and static clues for end-to-end motion prediction. arXiv preprint arXiv:2212.02181 (2022)"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Jiang, B., et al.: VAD: vectorized scene representation for efficient autonomous driving. arXiv preprint arXiv:2303.12077 (2023)","DOI":"10.1109\/ICCV51070.2023.00766"},{"key":"4_CR27","doi-asserted-by":"publisher","first-page":"353","DOI":"10.1007\/978-3-031-19839-7_21","volume-title":"ECCV 2022","author":"T Khurana","year":"2022","unstructured":"Khurana, T., Hu, P., Dave, A., Ziglar, J., Held, D., Ramanan, D.: Differentiable raycasting for self-supervised occupancy forecasting. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13698, pp. 353\u2013369. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19839-7_21"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Khurana, T., Hu, P., Held, D., Ramanan, D.: Point cloud forecasting as a proxy for 4D occupancy forecasting. In: CVPR, pp. 1116\u20131124 (2023)","DOI":"10.1109\/CVPR52729.2023.00114"},{"key":"4_CR29","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Li, J., Han, K., Wang, P., Liu, Y., Yuan, X.: Anisotropic convolutional networks for 3D semantic scene completion. In: CVPR, pp. 3351\u20133359 (2020)","DOI":"10.1109\/CVPR42600.2020.00341"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Li, Q., Wang, Y., Wang, Y., Zhao, H.: HDMapNet: an online HD map construction and evaluation framework. In: ICRA (2022)","DOI":"10.1109\/ICRA46639.2022.9812383"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Li, X., Zhang, Y., Ye, X.: DrivingDiffusion: layout-guided multi-view driving scene video generation with latent diffusion model. arXiv preprint arXiv:2310.07771 (2023)","DOI":"10.1007\/978-3-031-73229-4_27"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: BEVDepth: acquisition of reliable depth for multi-view 3D object detection. arXiv preprint arXiv:2206.10092 (2022)","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"4_CR34","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-031-20077-9_1","volume-title":"ECCV 2022","author":"Z Li","year":"2022","unstructured":"Li, Z., et al.: BEVFormer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13669, pp. 1\u201318. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20077-9_1"},{"key":"4_CR35","unstructured":"Liao, B., et al.: MapTR: structured modeling and learning for online vectorized HD map construction. arXiv preprint arXiv:2208.14437 (2022)"},{"key":"4_CR36","unstructured":"Liong, V.E., Nguyen, T.N.T., Widjaja, S., Sharma, D., Chong, Z.J.: AMVNet: assertion-based multi-view fusion network for LiDAR semantic segmentation. arXiv preprint arXiv:2012.04934 (2020)"},{"key":"4_CR37","unstructured":"Liu, Y., Wang, Y., Wang, Y., Zhao, H.: VectorMapNet: end-to-end vectorized HD map learning. arXiv preprint arXiv:2206.08920 (2022)"},{"key":"4_CR38","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhang, J., Fang, L., Jiang, Q., Zhou, B.: Multimodal motion prediction with stacked transformers. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00749"},{"key":"4_CR39","unstructured":"Loshchilov, I., Hutter, F.: SGDR: stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)"},{"key":"4_CR40","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"4_CR41","unstructured":"Mersch, B., Chen, X., Behley, J., Stachniss, C.: Self-supervised point cloud prediction using 3D spatio-temporal convolutional networks. In: CoRL, pp. 1444\u20131454 (2022)"},{"key":"4_CR42","unstructured":"Van Den Oord, A., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. arXiv preprint arXiv:1711.00937 (2017)"},{"key":"4_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1007\/978-3-030-58568-6_12","volume-title":"Computer Vision \u2013 ECCV 2020","author":"J Philion","year":"2020","unstructured":"Philion, J., Fidler, S.: Lift, splat, shoot: encoding images from arbitrary camera rigs by implicitly unprojecting to 3D. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 194\u2013210. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_12"},{"key":"4_CR44","doi-asserted-by":"crossref","unstructured":"Ratliff, N.D., Bagnell, J.A., Zinkevich, M.A.: Maximum margin planning. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 729\u2013736 (2006)","DOI":"10.1145\/1143844.1143936"},{"key":"4_CR45","doi-asserted-by":"crossref","unstructured":"Reading, C., Harakeh, A., Chae, J., Waslander, S.L.: Categorical depth distribution network for monocular 3D object detection. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00845"},{"key":"4_CR46","unstructured":"Renz, K., Chitta, K., Mercea, O.B., Koepke, A., Akata, Z., Geiger, A.: PlanT: explainable planning transformers via object-level representations. arXiv preprint arXiv:2210.14222 (2022)"},{"key":"4_CR47","doi-asserted-by":"crossref","unstructured":"Roldao, L., de\u00a0Charette, R., Verroust-Blondet, A.: LMSCNet: lightweight multiscale 3D semantic completion. In: 2020 International Conference on 3D Vision (3DV), pp. 111\u2013119 (2020)","DOI":"10.1109\/3DV50981.2020.00021"},{"key":"4_CR48","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"4_CR49","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv abs\/1409.1556 (2014)"},{"issue":"4","key":"4_CR50","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1145\/122344.122377","volume":"2","author":"RS Sutton","year":"1991","unstructured":"Sutton, R.S.: Dyna, an integrated architecture for learning, planning, and reacting. ACM SIGART Bull. 2(4), 160\u2013163 (1991)","journal-title":"ACM SIGART Bull."},{"key":"4_CR51","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: CVPR, pp.\u00a01\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"4_CR52","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"685","DOI":"10.1007\/978-3-030-58604-1_41","volume-title":"Computer Vision \u2013 ECCV 2020","author":"H Tang","year":"2020","unstructured":"Tang, H., et al.: Searching efficient 3D architectures with sparse point-voxel convolution. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12373, pp. 685\u2013702. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58604-1_41"},{"key":"4_CR53","unstructured":"Tian, X., Jiang, T., Yun, L., Wang, Y., Wang, Y., Zhao, H.: Occ3D: a large-scale 3D occupancy prediction benchmark for autonomous driving. arXiv preprint arXiv:2304.14365 (2023)"},{"key":"4_CR54","doi-asserted-by":"crossref","unstructured":"Tong, W., et al.: Scene as occupancy. In: ICCV, pp. 8406\u20138415 (2023)","DOI":"10.1109\/ICCV51070.2023.00772"},{"key":"4_CR55","doi-asserted-by":"crossref","unstructured":"Vitelli, M., et al.: SafetyNet: safe planning for real-world self-driving vehicles using machine-learned policies. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 897\u2013904 (2022)","DOI":"10.1109\/ICRA46639.2022.9811576"},{"key":"4_CR56","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhu, Z., Huang, G., Chen, X., Lu, J.: DriveDreamer: towards real-world-driven world models for autonomous driving. arXiv preprint arXiv:2309.09777 (2023)","DOI":"10.1007\/978-3-031-73195-2_4"},{"key":"4_CR57","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: OpenOccupancy: a large scale benchmark for surrounding semantic occupancy perception. arXiv preprint arXiv:2303.03991 (2023)","DOI":"10.1109\/ICCV51070.2023.01636"},{"key":"4_CR58","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhao, L., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: SurroundOcc: multi-camera 3D occupancy prediction for autonomous driving. In: ICCV, pp. 21729\u201321740 (2023)","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"4_CR59","doi-asserted-by":"crossref","unstructured":"Weng, X., Wang, J., Levine, S., Kitani, K., Rhinehart, N.: Inverting the pose forecasting pipeline with SPF2: sequential pointcloud forecasting for sequential pose forecasting. In: CoRL, pp. 11\u201320 (2021)","DOI":"10.1007\/978-3-031-19812-0_32"},{"key":"4_CR60","doi-asserted-by":"crossref","unstructured":"Yan, X., et al.: Sparse single sweep LiDAR point cloud segmentation via learning contextual shape priors from scene completion. In: AAAI, vol.\u00a035, pp. 3101\u20133109 (2021)","DOI":"10.1609\/aaai.v35i4.16419"},{"key":"4_CR61","unstructured":"Yang, K., Ma, E., Peng, J., Guo, Q., Lin, D., Yu, K.: BEVControl: accurately controlling street-view elements with multi-perspective consistency via BEV sketch layout. arXiv preprint arXiv:2308.01661 (2023)"},{"key":"4_CR62","unstructured":"Ye, D., et al.: LidarMultiNet: towards a unified multi-task network for LiDAR perception. arXiv preprint arXiv:2209.09385 (2022)"},{"key":"4_CR63","unstructured":"Ye, M., Wan, R., Xu, S., Cao, T., Chen, Q.: DRINet++: efficient voxel-as-point point cloud segmentation. arXiv preprint arXiv:2111.08318 (2021)"},{"key":"4_CR64","unstructured":"Ye, T., et al.: FusionAD: multi-modality fusion for prediction and planning tasks of autonomous driving. arXiv preprint arXiv:2308.01006 (2023)"},{"key":"4_CR65","first-page":"9644","volume":"26","author":"S Zeng","year":"2024","unstructured":"Zeng, S., Zheng, W., Lu, J., Yan, H.: Hardness-aware scene synthesis for semi-supervised 3D object detection. TMM 26, 9644\u20139656 (2024)","journal-title":"TMM"},{"key":"4_CR66","doi-asserted-by":"crossref","unstructured":"Zeng, W., et al.: End-to-end interpretable neural motion planner. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00886"},{"key":"4_CR67","unstructured":"Zhang, Y., et al.: BEVerse: unified perception and prediction in birds-eye-view for vision-centric autonomous driving. arXiv preprint arXiv:2205.09743 (2022)"},{"key":"4_CR68","doi-asserted-by":"crossref","unstructured":"Zhao, L., et al.: LowRankOcc: tensor decomposition and low-rank recovery for vision-based 3D semantic occupancy prediction. In: CVPR, pp. 9806\u20139815 (2024)","DOI":"10.1109\/CVPR52733.2024.00936"},{"key":"4_CR69","doi-asserted-by":"crossref","unstructured":"Zhou, J., et al.: Exploring imitation learning for autonomous driving with feedback synthesizer and differentiable rasterization. In: IROS, pp. 1450\u20131457 (2021)","DOI":"10.1109\/IROS51168.2021.9636795"},{"key":"4_CR70","doi-asserted-by":"crossref","unstructured":"Zhu, X., et al.: Cylindrical and asymmetrical 3D convolution networks for LiDAR segmentation. In: CVPR, pp. 9939\u20139948 (2021)","DOI":"10.1109\/CVPR46437.2021.00981"},{"key":"4_CR71","unstructured":"Zuo, S., Zheng, W., Huang, Y., Zhou, J., Lu, J.: PointOcc: cylindrical tri-perspective view for point-based 3D semantic occupancy prediction. arXiv preprint arXiv:2308.16896 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72624-8_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T07:45:45Z","timestamp":1732952745000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72624-8_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,26]]},"ISBN":["9783031726231","9783031726248"],"references-count":71,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72624-8_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,26]]},"assertion":[{"value":"26 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}