{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T04:42:02Z","timestamp":1772944922979,"version":"3.50.1"},"publisher-location":"Cham","reference-count":84,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031726699","type":"print"},{"value":"9783031726705","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72670-5_20","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"349-367","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["SegPoint: Segment Any Point Cloud via\u00a0Large Language Model"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1582-5684","authenticated-orcid":false,"given":"Shuting","family":"He","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4868-6526","authenticated-orcid":false,"given":"Henghui","family":"Ding","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9104-2315","authenticated-orcid":false,"given":"Xudong","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6874-6453","authenticated-orcid":false,"given":"Bihan","family":"Wen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Achlioptas, P., Abdelreheem, A., Xia, F., Elhoseiny, M., Guibas, L.: Referit3D: neural listeners for fine-grained 3D object identification in real-world scenes. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58452-8_25"},{"key":"20_CR2","unstructured":"Alayrac, J.B., et\u00a0al.: Flamingo: a visual language model for few-shot learning. In: NeurIPS (2022)"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Armeni, I., et al.: 3D semantic parsing of large-scale indoor spaces. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.170"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Chang, A.X., Nie\u00dfner, M.: ScanRefer: 3D object localization in RGB-D scans using natural language. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58565-5_13"},{"key":"20_CR5","unstructured":"Chen, Z., et al.: Vision transformer adapter for dense predictions. In: ICLR (2023)"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Choy, C., Gwak, J., Savarese, S.: 4D spatio-temporal convnets: Minkowski convolutional neural networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00319"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Dai, A., Chang, A.X., Savva, M., Halber, M., Funkhouser, T., Nie\u00dfner, M.: ScanNet: richly-annotated 3d reconstructions of indoor scenes. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.261"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., He, S., Jiang, X., Loy, C.C.: MeViS: a large-scale benchmark for video segmentation with motion expressions. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00254"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., He, S., Jiang, X., Torr, P.H., Bai, S.: MOSE: a new dataset for video object segmentation in complex scenes. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01850"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: Vision-language transformer and query generation for referring segmentation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01601"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Ding, H., Liu, C., Wang, S., Jiang, X.: VLT: Vision-language transformer and query generation for referring segmentation. IEEE TPAMI (2023)","DOI":"10.1109\/TPAMI.2022.3217852"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Ding, R., Yang, J., Xue, C., Zhang, W., Bai, S., Qi, X.: PLA: language-driven open-vocabulary 3d scene understanding. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00677"},{"key":"20_CR13","unstructured":"Ding, Z., Wang, J., Tu, Z.: Open-vocabulary panoptic segmentation with MaskCLIP. In: ICLR (2023)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Girdhar, R., et al.: ImageBind: one embedding space to bind them all. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"20_CR15","unstructured":"Guo, Z., et\u00a0al.: Point-bind & point-LLM: aligning point cloud with multi-modality for 3D understanding, generation, and instruction following. arXiv preprint arXiv:2309.00615 (2023)"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"He, S., Ding, H.: Decoupling static and hierarchical motion perception for referring video segmentation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01266"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"He, S., Ding, H.: RefMask3D: Language-guided transformer for 3D referring segmentation. In: ACM MM (2024)","DOI":"10.1145\/3664647.3680998"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"He, S., Jiang, X., Jiang, W., Ding, H.: Prototype adaption and projection for few-and zero-shot 3D point cloud semantic segmentation. IEEE TIP (2023)","DOI":"10.1109\/TIP.2023.3279660"},{"key":"20_CR20","unstructured":"Hong, Y., et al.: 3D-LLM: injecting the 3d world into large language models. In: NeurIPS (2023)"},{"key":"20_CR21","unstructured":"Hu, E.J., et al.: LORA: low-rank adaptation of large language models. In: ICLR (2022)"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Huang, P.H., Lee, H.H., Chen, H.T., Liu, T.L.: Text-guided graph neural networks for referring 3D instance segmentation. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i2.16253"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Jain, A., Gkanatsios, N., Mediratta, I., Fragkiadaki, K.: Bottom up top down detection transformers for language grounding in images and point clouds. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20059-5_24"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Jia, B., et al.: SceneVerse: scaling 3D vision-language learning for grounded scene understanding. In: ECCV (2024)","DOI":"10.1007\/978-3-031-72673-6_16"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Jiang, L., Zhao, H., Shi, S., Liu, S., Fu, C.W., Jia, J.: PointGroup: dual-set point grouping for 3d instance segmentation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00492"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Kolodiazhnyi, M., Vorontsova, A., Konushin, A., Rukhovich, D.: Oneformer3D: one transformer for unified point cloud segmentation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01979"},{"key":"20_CR28","doi-asserted-by":"crossref","unstructured":"Lai, X., et al.: LISA: reasoning segmentation via large language model. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00915"},{"key":"20_CR29","doi-asserted-by":"crossref","unstructured":"Lai, X., Yuan, Y., Chu, R., Chen, Y., Hu, H., Jia, J.: Mask-attention-free transformer for 3D instance segmentation. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00342"},{"key":"20_CR30","unstructured":"Li, B., Zhang, Y., Chen, L., Wang, J., Yang, J., Liu, Z.: Otter: a multi-modal model with in-context instruction tuning. arXiv:2305.03726 (2023)"},{"key":"20_CR31","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: NeurIPS (2023)"},{"key":"20_CR32","doi-asserted-by":"crossref","unstructured":"Lin, Z., et\u00a0al.: Sphinx: The joint mixing of weights, tasks, and visual embeddings for multi-modal large language models. arXiv preprint arXiv:2311.07575 (2023)","DOI":"10.1007\/978-3-031-73033-7_3"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Liu, C., Ding, H., Jiang, X.: GRES: Generalized referring expression segmentation. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02259"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Liu, C., Ding, H., Zhang, Y., Jiang, X.: Multi-modal mutual attention and iterative interaction for referring image segmentation. IEEE TIP (2023)","DOI":"10.1109\/TIP.2023.3277791"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Liu, C., Jiang, X., Ding, H.: Instance-specific feature propagation for referring segmentation. IEEE TMM (2022)","DOI":"10.1109\/TMM.2022.3163578"},{"issue":"1","key":"20_CR36","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1007\/s44267-024-00049-8","volume":"2","author":"C Liu","year":"2024","unstructured":"Liu, C., Jiang, X., Ding, H.: PrimitiveNet: decomposing the global constraints for referring segmentation. Visual Intell. 2(1), 16 (2024)","journal-title":"Visual Intell."},{"key":"20_CR37","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: NeurIPS (2023)"},{"key":"20_CR38","unstructured":"Liu, Y., et al.: Segment any point cloud sequences by distilling vision foundation models. In: NeurIPS (2023)"},{"key":"20_CR39","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: ICLR (2019)"},{"key":"20_CR40","doi-asserted-by":"crossref","unstructured":"Nguyen, P.D., et al.: Open3Dis: open-vocabulary 3D instance segmentation with 2D mask guidance. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00385"},{"key":"20_CR41","unstructured":"Park, N., Kim, S.: How do vision transformers work? arXiv preprint arXiv:2202.06709 (2022)"},{"key":"20_CR42","doi-asserted-by":"crossref","unstructured":"Peng, S., Genova, K., Jiang, C., Tagliasacchi, A., Pollefeys, M., Funkhouser, T., et\u00a0al.: Openscene: 3D scene understanding with open vocabularies. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00085"},{"key":"20_CR43","unstructured":"Peng, Z., et al.: KOSMOS-2: grounding multimodal large language models to the world. In: ICLR (2024)"},{"key":"20_CR44","doi-asserted-by":"crossref","unstructured":"Pi, R., et al.: DetGPT: detect what you need via reasoning. In: EMNLP (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.876"},{"key":"20_CR45","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: CVPR (2017)"},{"key":"20_CR46","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: NeurIPS (2017)"},{"key":"20_CR47","doi-asserted-by":"crossref","unstructured":"Qi, Z., et al.: GPT4Point: a unified framework for point-language understanding and generation. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02495"},{"key":"20_CR48","unstructured":"Qian, G., et al.: PointNext: revisiting PointNet++ with improved training and scaling strategies. In: NeurIPS (2022)"},{"key":"20_CR49","doi-asserted-by":"crossref","unstructured":"Qian, Z., Ma, Y., Ji, J., Sun, X.: X-refseg3D: enhancing referring 3D instance segmentation via structured cross-modal graph neural networks. In: AAAI (2024)","DOI":"10.1609\/aaai.v38i5.28254"},{"key":"20_CR50","doi-asserted-by":"crossref","unstructured":"Rasheed, H., et al.: GLAMM: pixel grounding large multimodal model. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01236"},{"key":"20_CR51","doi-asserted-by":"crossref","unstructured":"Rasley, J., Rajbhandari, S., Ruwase, O., He, Y.: DeepSpeed: system optimizations enable training deep learning models with over 100 billion parameters. In: KDD (2020)","DOI":"10.1145\/3394486.3406703"},{"key":"20_CR52","doi-asserted-by":"crossref","unstructured":"Ren, Z., et al.: PixeLLM: pixel reasoning with large multimodal model. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02491"},{"key":"20_CR53","doi-asserted-by":"crossref","unstructured":"Rozenberszki, D., Litany, O., Dai, A.: Language-grounded indoor 3D semantic segmentation in the wild. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19827-4_8"},{"key":"20_CR54","doi-asserted-by":"crossref","unstructured":"Schult, J., Engelmann, F., Hermans, A., Litany, O., Tang, S., Leibe, B.: Mask3D: mask transformer for 3D semantic instance segmentation. In: ICRA (2023)","DOI":"10.1109\/ICRA48891.2023.10160590"},{"key":"20_CR55","unstructured":"Shuai, X., Ding, H., Ma, X., Tu, R., Jiang, Y.G., Tao, D.: A survey of multimodal-guided image editing with text-to-image diffusion models. arXiv preprint arXiv:2406.14555 (2024)"},{"key":"20_CR56","doi-asserted-by":"crossref","unstructured":"Sun, J., Qing, C., Tan, J., Xu, X.: Superpoint transformer for 3D scene instance segmentation. In: AAAI (2023)","DOI":"10.1609\/aaai.v37i2.25335"},{"key":"20_CR57","unstructured":"Takmaz, A., Fedele, E., Sumner, R.W., Pollefeys, M., Tombari, F., Engelmann, F.: Openmask3D: open-vocabulary 3D instance segmentation. In: NeurIPS (2023)"},{"key":"20_CR58","doi-asserted-by":"crossref","unstructured":"Thomas, H., Qi, C.R., Deschaud, J.E., Marcotegui, B., Goulette, F., Guibas, L.J.: KPCONV: flexible and deformable convolution for point clouds. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00651"},{"key":"20_CR59","unstructured":"Touvron, H., et\u00a0al.: Llama: open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)"},{"key":"20_CR60","doi-asserted-by":"crossref","unstructured":"Wang, P.S.: OctFormer: octree-based transformers for 3D point clouds. In: SIGGRAPH (2023)","DOI":"10.1145\/3592131"},{"key":"20_CR61","unstructured":"Wang, W., et\u00a0al.: VisionLLM: large language model is also an open-ended decoder for vision-centric tasks. In: NeurIPS (2023)"},{"key":"20_CR62","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: PVT V2: improved baselines with pyramid vision transformer. Computational Visual Media (2022)","DOI":"10.1007\/s41095-022-0274-8"},{"key":"20_CR63","doi-asserted-by":"crossref","unstructured":"Wu, C., et al.: 3D-STMN: dependency-driven superpoint-text matching network for end-to-end 3D referring expression segmentation. In: AAAI (2024)","DOI":"10.1609\/aaai.v38i6.28408"},{"key":"20_CR64","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: CVT: introducing convolutions to vision transformers. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"20_CR65","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: Towards open vocabulary learning: a survey. IEEE TPAMI (2024)","DOI":"10.1109\/TPAMI.2024.3361862"},{"key":"20_CR66","unstructured":"Wu, X., Lao, Y., Jiang, L., Liu, X., Zhao, H.: Point transformer V2: grouped vector attention and partition-based pooling. In: NeurIPS (2022)"},{"key":"20_CR67","doi-asserted-by":"crossref","unstructured":"Wu, Y., Cheng, X., Zhang, R., Cheng, Z., Zhang, J.: EDA: explicit text-decoupling and dense alignment for 3D visual grounding. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01843"},{"key":"20_CR68","doi-asserted-by":"crossref","unstructured":"Xiao, Z., Zhang, W., Wang, T., Loy, C.C., Lin, D., Pang, J.: Position-guided point cloud panoptic segmentation transformer. arXiv preprint arXiv:2303.13509 (2023)","DOI":"10.1007\/s11263-024-02162-z"},{"key":"20_CR69","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: Open-vocabulary panoptic segmentation with text-to-image diffusion models. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"20_CR70","doi-asserted-by":"crossref","unstructured":"Xu, R., Wang, X., Wang, T., Chen, Y., Pang, J., Lin, D.: PointLLM: empowering large language models to understand point clouds. In: ECCV (2024)","DOI":"10.1101\/2024.03.23.586383"},{"key":"20_CR71","doi-asserted-by":"crossref","unstructured":"Yang, J., Ding, R., Deng, W., Wang, Z., Qi, X.: RegioNPLC: regional point-language contrastive learning for open-world 3D scene understanding. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01874"},{"key":"20_CR72","unstructured":"Yang, Y.Q., et al.: Swin3D: a pretrained transformer backbone for 3D indoor scene understanding. arXiv preprint arXiv:2304.06906 (2023)"},{"key":"20_CR73","unstructured":"Ye, Q., et\u00a0al.: mPLUG-Owl: modularization empowers large language models with multimodality. arXiv:2304.14178 (2023)"},{"key":"20_CR74","doi-asserted-by":"crossref","unstructured":"Yeshwanth, C., Liu, Y.C., Nie\u00dfner, M., Dai, A.: ScanNet++: a high-fidelity dataset of 3D indoor scenes. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00008"},{"key":"20_CR75","unstructured":"You, H., et al.: FERRET: refer and ground anything anywhere at any granularity. In: ICLR (2024)"},{"key":"20_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, H., et\u00a0al.: LLAVA-grounding: grounded visual chat with large multimodal models. arXiv preprint arXiv:2312.02949 (2023)","DOI":"10.1007\/978-3-031-72775-7_2"},{"key":"20_CR77","doi-asserted-by":"crossref","unstructured":"Zhang, H., Ding, H.: Prototypical matching and open set rejection for zero-shot semantic segmentation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00689"},{"key":"20_CR78","unstructured":"Zhang, S., et al.: GPT4ROI: instruction tuning large language model on region-of-interest. arXiv:2307.03601 (2023)"},{"key":"20_CR79","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Gong, Z., Chang, A.X.: Multi3Drefer: grounding text description to multiple 3d objects. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01397"},{"key":"20_CR80","doi-asserted-by":"crossref","unstructured":"Zhao, H., Jiang, L., Jia, J., Torr, P.H., Koltun, V.: Point transformer. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01595"},{"key":"20_CR81","unstructured":"Zhou, J., Wang, J., Ma, B., Liu, Y.S., Huang, T., Wang, X.: Uni3D: exploring unified 3D representation at scale. In: ICLR (2024)"},{"key":"20_CR82","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Zhang, Y., Foroosh, H.: Panoptic-PolarNet: proposal-free Lidar point cloud panoptic segmentation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01299"},{"key":"20_CR83","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: MiniGPT-4: enhancing vision-language understanding with advanced large language models. In: ICLR (2024)"},{"key":"20_CR84","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Ma, X., Chen, Y., Deng, Z., Huang, S., Li, Q.: 3D-Vista: pre-trained transformer for 3D vision and text alignment. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00272"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72670-5_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T21:20:58Z","timestamp":1732828858000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72670-5_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031726699","9783031726705"],"references-count":84,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72670-5_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}