{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,4]],"date-time":"2025-09-04T13:45:09Z","timestamp":1756993509042,"version":"3.40.3"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031726231"},{"type":"electronic","value":"9783031726248"}],"license":[{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T00:00:00Z","timestamp":1729900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72624-8_3","type":"book-chapter","created":{"date-parts":[[2024,10,25]],"date-time":"2024-10-25T09:52:13Z","timestamp":1729849933000},"page":"37-54","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["SpatialFormer: Towards Generalizable Vision Transformers with\u00a0Explicit Spatial Understanding"],"prefix":"10.1007","author":[{"given":"Han","family":"Xiao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenzhao","family":"Zheng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sicheng","family":"Zuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peng","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiwen","family":"Lu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,10,26]]},"reference":[{"key":"3_CR1","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. NeurIPS 33, 12449\u201312460 (2020)"},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: nuScenes: a multimodal dataset for autonomous driving. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Cai, Z., Vasconcelos, N.: Cascade R-CNN: delving into high quality object detection. In: CVPR, pp. 6154\u20136162 (2018)","DOI":"10.1109\/CVPR.2018.00644"},{"key":"3_CR4","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: ECCV, pp. 213\u2013229 (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"3_CR5","unstructured":"Chen, C.F., Panda, R., Fan, Q.: RegionViT: regional-to-local attention for vision transformers. arXiv preprint arXiv:2106.02689 (2021)"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, B., Misra, I., Schwing, A.G., Kirillov, A., Girdhar, R.: Masked-attention mask transformer for universal image segmentation. In: CVPR, pp. 1290\u20131299 (2022)","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"3_CR7","unstructured":"Cheng, B., Schwing, A.G., Kirillov, A.: Per-pixel classification is not all you need for semantic segmentation. In: NeurIPS (2021)"},{"key":"3_CR8","unstructured":"Chu, X., et al.: Twins: revisiting the design of spatial attention in vision transformers. In: NeurIPS (2021)"},{"key":"3_CR9","unstructured":"Chu, X., Tian, Z., Zhang, B., Wang, X., Shen, C.: Conditional positional encodings for vision transformers. In: ICLR (2022)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Dai, X., Chen, Y., Yang, J., Zhang, P., Yuan, L., Zhang, L.: Dynamic DETR: end-to-end object detection with dynamic attention. In: ICCV, pp. 2988\u20132997 (2021)","DOI":"10.1109\/ICCV48922.2021.00298"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Dong, X., et al.: CSWin transformer: a general vision transformer backbone with cross-shaped windows. arXiv preprint arXiv:2107.00652 (2021)","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Dong, X., et al.: CSWin transformer: a general vision transformer backbone with cross-shaped windows. In: CVPR, pp. 12124\u201312134 (2022)","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"3_CR14","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: transformers for image recognition at scale. In: ICLR (2020)"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Graham, B., et al.: LeViT: a vision transformer in ConvNet\u2019s clothing for faster inference. In: ICCV, pp. 12259\u201312269 (2021)","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Grainger, R., Paniagua, T., Song, X., Cuntoor, N., Lee, M.W., Wu, T.: PaCa-ViT: learning patch-to-cluster attention in vision transformers. In: CVPR, pp. 18568\u201318578 (2023)","DOI":"10.1109\/CVPR52729.2023.01781"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Guo, J., et al.: CMT: convolutional neural networks meet vision transformers. In: CVPR, pp. 12175\u201312185 (2022)","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, B., Zhou, J., Lu, J.: SelfOcc: self-supervised vision-based 3D occupancy prediction. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01885"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J.: Tri-perspective view for vision-based 3D semantic occupancy prediction. arXiv preprint arXiv:2302.07817 (2023)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Huang, Y., Zheng, W., Zhang, Y., Zhou, J., Lu, J.: Gaussianformer: scene as Gaussians for vision-based 3D semantic occupancy prediction. In: ECCV (2024)","DOI":"10.1109\/CVPR52729.2023.00890"},{"key":"3_CR22","unstructured":"Li, K., et al.: Uniformer: unifying convolution and self-attention for visual recognition. arXiv preprint arXiv:2201.09450 (2022)"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: BEVDepth: acquisition of reliable depth for multi-view 3D object detection. arXiv preprint arXiv:2206.10092 (2022)","DOI":"10.1609\/aaai.v37i2.25233"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Li, Z., et al.: BEVFormer: learning bird\u2019s-eye-view representation from multi-camera images via spatiotemporal transformers. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"3_CR25","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) Computer Vision \u2013 ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V, pp. 740\u2013755. Springer International Publishing, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"3_CR26","unstructured":"Liu, S., et al.: DAB-DETR: dynamic anchor boxes are better queries for DETR. arXiv preprint arXiv:2201.12329 (2022)"},{"key":"3_CR27","first-page":"23818","volume":"34","author":"Y Liu","year":"2021","unstructured":"Liu, Y., Sangineto, E., Bi, W., Sebe, N., Lepri, B., Nadai, M.: Efficient training of visual transformers with small datasets. NeurIPS 34, 23818\u201323830 (2021)","journal-title":"NeurIPS"},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Y., Wang, T., Zhang, X., Sun, J.: PETR: position embedding transformation for multi-view 3D object detection. arXiv preprint arXiv:2203.05625 (2022)","DOI":"10.1007\/978-3-031-19812-0_31"},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C.Y., Feichtenhofer, C., Darrell, T., Xie, S.: A convNet for the 2020s. arXiv preprint arXiv:2201.03545 (2022)","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"3_CR31","unstructured":"Lu, J., et al.: SOFT: softmax-free transformer with linear complexity. In: NeurIPS (2021)"},{"key":"3_CR32","doi-asserted-by":"crossref","unstructured":"Ren, S., Zhou, D., He, S., Feng, J., Wang, X.: Shunted self-attention via multi-scale token aggregation. In: CVPR, pp. 10853\u201310862 (2022)","DOI":"10.1109\/CVPR52688.2022.01058"},{"issue":"3","key":"3_CR33","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: Imagenet large scale visual recognition challenge. IJCV 115(3), 211\u2013252 (2015). https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"IJCV"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Strudel, R., Garcia, R., Laptev, I., Schmid, C.: Segmenter: transformer for semantic segmentation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"3_CR35","doi-asserted-by":"publisher","first-page":"127063","DOI":"10.1016\/j.neucom.2023.127063","volume":"568","author":"J Su","year":"2024","unstructured":"Su, J., Ahmed, M., Lu, Y., Pan, S., Bo, W., Liu, Y.: RoFormer: enhanced transformer with rotary position embedding. Neurocomputing 568, 127063 (2024)","journal-title":"Neurocomputing"},{"key":"3_CR36","first-page":"7537","volume":"33","author":"M Tancik","year":"2020","unstructured":"Tancik, M., et al.: Fourier features let networks learn high frequency functions in low dimensional domains. NeurIPS 33, 7537\u20137547 (2020)","journal-title":"NeurIPS"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Tong, W., et\u00a0al.: Scene as occupancy. In: ICCV, pp. 8406\u20138415 (2023)","DOI":"10.1109\/ICCV51070.2023.00772"},{"key":"3_CR38","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers and distillation through attention. In: ICML, pp. 10347\u201310357 (2021)"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Touvron, H., Cord, M., Sablayrolles, A., Synnaeve, G., J\u00e9gou, H.: Going deeper with image transformers. In: ICCV, pp. 32\u201342 (2021)","DOI":"10.1109\/ICCV48922.2021.00010"},{"key":"3_CR40","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS, pp. 5998\u20136008 (2017)"},{"key":"3_CR41","doi-asserted-by":"crossref","unstructured":"Wang, C., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: OPERA: omni-supervised representation learning with hierarchical supervisions. In: ICCV, pp. 5559\u20135570 (2023)","DOI":"10.1109\/ICCV51070.2023.00512"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"issue":"3","key":"3_CR43","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang, W., et al.: PVT v2: improved baselines with pyramid vision transformer. Comput. Vis. Media 8(3), 415\u2013424 (2022). https:\/\/doi.org\/10.1007\/s41095-022-0274-8","journal-title":"Comput. Vis. Media"},{"key":"3_CR44","unstructured":"Wang, W., et al.: CrossFormer: a versatile vision transformer hinging on cross-scale attention. In: ICLR (2023)"},{"key":"3_CR45","unstructured":"Wang, Y., Guizilini, V., Zhang, T., Wang, Y., Zhao, H., Solomon, J.M.: DETR3D: 3D object detection from multi-view images via 3D-to-2D queries. In: CoRL (2021)"},{"key":"3_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Y., et al.: End-to-end video instance segmentation with transformers. In: CVPR, pp. 8741\u20138750 (2021)","DOI":"10.1109\/CVPR46437.2021.00863"},{"key":"3_CR47","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhao, L., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: Surroundocc: multi-camera 3D occupancy prediction for autonomous driving. In: ICCV, pp. 21729\u201321740 (2023)","DOI":"10.1109\/ICCV51070.2023.01986"},{"key":"3_CR48","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: CvT: introducing convolutions to vision transformers. In: CVPR, pp. 22\u201331 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"3_CR49","doi-asserted-by":"crossref","unstructured":"Xia, Z., Pan, X., Song, S., Li, L.E., Huang, G.: Vision transformer with deformable attention. In: CVPR, pp. 4794\u20134803 (2022)","DOI":"10.1109\/CVPR52688.2022.00475"},{"key":"3_CR50","doi-asserted-by":"crossref","unstructured":"Xiao, H., Zheng, W., Zhu, Z., Zhou, J., Lu, J.: Token-label alignment for vision transformers. In: ICCV, pp. 5495\u20135504 (2023)","DOI":"10.1109\/ICCV51070.2023.00506"},{"key":"3_CR51","doi-asserted-by":"crossref","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: ECCV, pp. 418\u2013434 (2018)","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"3_CR52","unstructured":"Yang, J., et al.: Focal self-attention for local-global interactions in vision transformers. arXiv preprint arXiv:2107.00641 (2021)"},{"key":"3_CR53","doi-asserted-by":"publisher","unstructured":"Yu, Q. et al.: K-means mask transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol 13689, pp. 288\u2013307 (2022) Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-031-19818-2_17","DOI":"10.1007\/978-3-031-19818-2_17"},{"key":"3_CR54","doi-asserted-by":"crossref","unstructured":"Zeng, S., Zheng, W., Lu, J., Yan, H.: Hardness-aware scene synthesis for semi-supervised 3D object detection. TMM (2024)","DOI":"10.1109\/TMM.2024.3396297"},{"key":"3_CR55","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Zhang, J., Xu, Y., Tao, D.: Vision transformer with quadrangle attention. TPAMI (2024)","DOI":"10.1109\/TPAMI.2023.3347693"},{"key":"3_CR56","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Zheng, W., Zhu, Z., Huang, G., Zhou, J., Lu, J.: A simple baseline for multi-camera 3D object detection. arXiv preprint arXiv:2208.10035 (2022)","DOI":"10.1609\/aaai.v37i3.25460"},{"key":"3_CR57","unstructured":"Zhang, Y., et al.: BEVerse: unified perception and prediction in birds-eye-view for vision-centric autonomous driving. arXiv preprint arXiv:2205.09743 (2022)"},{"key":"3_CR58","doi-asserted-by":"crossref","unstructured":"Zhao, L., et al.: LowRankOcc: tensor decomposition and low-rank recovery for vision-based 3D semantic occupancy prediction. In: CVPR. pp, 9806\u20139815 (2024)","DOI":"10.1109\/CVPR52733.2024.00936"},{"key":"3_CR59","doi-asserted-by":"crossref","unstructured":"Zheng, S., et\u00a0al.: Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. In: CVPR, pp. 6881\u20136890 (2021)","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"3_CR60","doi-asserted-by":"crossref","unstructured":"Zheng, W., Chen, W., Huang, Y., Zhang, B., Duan, Y., Lu, J.: OccWorld: learning a 3D occupancy world model for autonomous driving. In: ECCV (2024)","DOI":"10.1007\/978-3-031-72624-8_4"},{"key":"3_CR61","doi-asserted-by":"crossref","unstructured":"Zheng, W., Lu, J., Jie, Z.: Structural deep metric learning for room layout estimation. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58523-5_43"},{"key":"3_CR62","doi-asserted-by":"crossref","unstructured":"Zheng, W., Song, R., Guo, X., Chen, L.: GenAD: Generative end-to-end autonomous driving. In: ECCV (2024)","DOI":"10.1007\/978-3-031-73650-6_6"},{"key":"3_CR63","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"3_CR64","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou, B., et al.: Semantic understanding of scenes through the ADE20K dataset. IJCV 127, 302\u2013321 (2019). https:\/\/doi.org\/10.1007\/s11263-018-1140-0","journal-title":"IJCV"},{"key":"3_CR65","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: ICLR (2020)"},{"key":"3_CR66","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: ICLR (2021)"},{"key":"3_CR67","unstructured":"Zuo, S., Zheng, W., Huang, Y., Zhou, J., Lu, J.: PointOcc: cylindrical tri-perspective view for point-based 3D semantic occupancy prediction. arXiv preprint arXiv:2308.16896 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72624-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T07:45:27Z","timestamp":1732952727000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72624-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,26]]},"ISBN":["9783031726231","9783031726248"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72624-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,26]]},"assertion":[{"value":"26 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}