{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T19:33:11Z","timestamp":1773775991034,"version":"3.50.1"},"publisher-location":"Cham","reference-count":67,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729850","type":"print"},{"value":"9783031729867","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T00:00:00Z","timestamp":1730505600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72986-7_12","type":"book-chapter","created":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T05:07:53Z","timestamp":1730437673000},"page":"198-215","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Contrastive Region Guidance: Improving Grounding in\u00a0Vision-Language Models Without Training"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-5076-5115","authenticated-orcid":false,"given":"David","family":"Wan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1558-6169","authenticated-orcid":false,"given":"Jaemin","family":"Cho","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6689-505X","authenticated-orcid":false,"given":"Elias","family":"Stengel-Eskin","sequence":"additional","affiliation":[]},{"given":"Mohit","family":"Bansal","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,2]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"12_CR2","unstructured":"Bahng, H., Jahanian, A., Sankaranarayanan, S., Isola, P.: Exploring visual prompts for adapting large-scale models (2022)"},{"key":"12_CR3","unstructured":"Bai, J., et al.: Qwen-VL: a versatile vision-language model for understanding, localization, text reading, and beyond. arXiv preprint arXiv:2308.12966 (2023)"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Bai, Y., et al.: Sequential modeling enables scalable learning for large vision models (2023)","DOI":"10.1109\/CVPR52733.2024.02157"},{"key":"12_CR5","unstructured":"Bar, A., Gandelsman, Y., Darrell, T., Globerson, A., Efros, A.A.: Visual Prompting via Image Inpainting. In: NeurIPS (2022)"},{"key":"12_CR6","unstructured":"Cai, M., et al.: Making large multimodal models understand arbitrary visual prompts (2023). http:\/\/arxiv.org\/abs\/2312.00784"},{"key":"12_CR7","unstructured":"Chen, K., Zhang, Z., Zeng, W., Zhang, R., Zhu, F., Zhao, R.: Shikra: unleashing multimodal LLM\u2019s referential dialogue magic. arXiv preprint arXiv:2306.15195 (2023)"},{"key":"12_CR8","unstructured":"Chen, X., et\u00a0al.: PaLI-X: on scaling up a multilingual vision and language model. arXiv preprint arXiv:2305.18565 (2023)"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., Zhao, Z., Zhang, Y., Duan, M., Qi, D., Zhao, H.: FocalClick: towards practical interactive image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1300\u20131309 (2022)","DOI":"10.1109\/CVPR52688.2022.00136"},{"key":"12_CR10","unstructured":"Chen, Z., Zhao, Z., Luo, H., Yao, H., Li, B., Zhou, J.: HALC: object hallucination reduction via adaptive focal-contrast decoding. arXiv preprint arXiv:2403.00425 (2024)"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Cheng, T., Song, L., Ge, Y., Liu, W., Wang, X., Shan, Y.: YOLO-world: real-time open-vocabulary object detection. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01599"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Cho, J.W., Kim, D.J., Ryu, H., Kweon, I.S.: Generative bias for robust visual question answering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11681\u201311690 (2023)","DOI":"10.1109\/CVPR52729.2023.01124"},{"key":"12_CR13","unstructured":"Dai, W., et al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning. In: Thirty-Seventh Conference on Neural Information Processing Systems (2023). https:\/\/openreview.net\/forum?id=vvoWPYqZJA"},{"key":"12_CR14","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Goyal, Y., Khot, T., Summers-Stay, D., Batra, D., Parikh, D.: Making the V in VQA matter: elevating the role of image understanding in visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6904\u20136913 (2017)","DOI":"10.1109\/CVPR.2017.670"},{"key":"12_CR16","unstructured":"He, M., et al.: Efficient multimodal learning from data-centric perspective. arXiv preprint arXiv:2402.11530 (2024)"},{"key":"12_CR17","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. In: NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications (2021). https:\/\/openreview.net\/forum?id=qw8AKxfYbI"},{"key":"12_CR18","doi-asserted-by":"publisher","unstructured":"Honnibal, M., Montani, I., Landeghem, S.V., Boyd, A.: spaCy: industrial-strength natural language processing in python (2020). https:\/\/doi.org\/10.5281\/zenodo.1212303, https:\/\/spacy.io","DOI":"10.5281\/zenodo.1212303"},{"key":"12_CR19","unstructured":"Hsieh, C.Y., Zhang, J., Ma, Z., Kembhavi, A., Krishna, R.: SugarCrepe: fixing hackable benchmarks for vision-language compositionality. In: Thirty-Seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track (2023)"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Kamath, A., Singh, M., LeCun, Y., Misra, I., Synnaeve, G., Carion, N.: MDETR\u2013modulated detection for end-to-end multi-modal understanding. arXiv preprint arXiv:2104.12763 (2021)","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Kamath, A., Hessel, J., Chang, K.W.: What\u2019s \u201cup\u201d with vision-language models? Investigating their struggle with spatial reasoning. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pp. 9161\u20139175 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.568"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Kazemzadeh, S., Ordonez, V., Matten, M., Berg, T.: ReferitGame: referring to objects in photographs of natural scenes. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 787\u2013798 (2014)","DOI":"10.3115\/v1\/D14-1086"},{"key":"12_CR23","doi-asserted-by":"publisher","unstructured":"Khattak, M.U., Rasheed, H., Maaz, M., Khan, S., Khan, F.S.: MaPLe: multi-modal prompt learning. In: CVPR (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01832","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Kornblith, S., Li, L., Wang, Z., Nguyen, T.: Guiding image captioning models toward more specific captions. In: ICCV (2023). http:\/\/arxiv.org\/abs\/2307.16686","DOI":"10.1109\/ICCV51070.2023.01400"},{"key":"12_CR26","unstructured":"Leng, S., et al.: Mitigating object hallucinations in large vision-language models through visual contrastive decoding (2023). http:\/\/arxiv.org\/abs\/2311.16922"},{"key":"12_CR27","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: CVPR (2022)","DOI":"10.1109\/CVPR52729.2023.02240"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Li, X.L., et al.: Contrastive decoding: open-ended text generation as optimization. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (2023)","DOI":"10.18653\/v1\/2023.acl-long.687"},{"key":"12_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"12_CR31","unstructured":"Liu, H., et al.: LLaVA-next: improved reasoning, OCR, and world knowledge (2024). https:\/\/llava-vl.github.io\/blog\/2024-01-30-llava-next\/"},{"key":"12_CR32","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: Thirty-seventh Conference on Neural Information Processing Systems (2023). https:\/\/openreview.net\/forum?id=w0H2xGHlkw"},{"key":"12_CR33","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Liu, Y., Guo, Y., Yin, J., Song, X., Liu, W., Nie, L., Zhang, M.: Answer questions with right image regions: a visual attention regularization approach. ACM Trans. Multimed. Comput. Commun. Appl. (TOMM) 18(4), 1\u201318 (2022)","DOI":"10.1145\/3498340"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Ma, Z., Hong, J., Gul, M.O., Gandhi, M., Gao, I., Krishna, R.: CREPE: can vision-language foundation models reason compositionally? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10910\u201310921 (2023)","DOI":"10.1109\/CVPR52729.2023.01050"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O.M., Yuille, A.L., Murphy, K.P.: Generation and comprehension of unambiguous object descriptions. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11\u201320 (2015). https:\/\/api.semanticscholar.org\/CorpusID:8745888","DOI":"10.1109\/CVPR.2016.9"},{"key":"12_CR38","unstructured":"O\u2019Brien, S., Lewis, M.: Contrastive decoding improves reasoning in large language models (2023)"},{"key":"12_CR39","unstructured":"Achiam, J., et al.: GPT-4 technical report (2023)"},{"issue":"1","key":"12_CR40","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1007\/s11263-016-0965-7","volume":"123","author":"BA Plummer","year":"2017","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. IJCV 123(1), 74\u201393 (2017)","journal-title":"IJCV"},{"key":"12_CR41","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8748\u20138763. PMLR (2021). http:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"12_CR42","unstructured":"Ray, A., Radenovic, F., Dubey, A., Plummer, B., Krishna, R., Saenko, K.: Cola: a benchmark for compositional text-to-image retrieval. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"12_CR43","unstructured":"Ren, T., et al.: Grounded SAM: assembling open-world models for diverse visual tasks (2024)"},{"key":"12_CR44","doi-asserted-by":"publisher","unstructured":"Ribeiro, M., Singh, S., Guestrin, C.: \u201cWhy should I trust you?\u201d: explaining the predictions of any classifier. In: DeNero, J., Finlayson, M., Reddy, S. (eds.) Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations, pp. 97\u2013101. Association for Computational Linguistics, San Diego (2016). https:\/\/doi.org\/10.18653\/v1\/N16-3020, https:\/\/aclanthology.org\/N16-3020","DOI":"10.18653\/v1\/N16-3020"},{"key":"12_CR45","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Oh, A.H., Agarwal, A., Belgrave, D., Cho, K. (eds.) Advances in Neural Information Processing Systems (2022). https:\/\/openreview.net\/forum?id=08Yk-n5l2Al"},{"key":"12_CR46","unstructured":"Salesforce AI Research: Xgen-mm-phi3-mini-instruct model card (2024). https:\/\/huggingface.co\/Salesforce\/xgen-mm-phi3-mini-instruct-r-v1"},{"key":"12_CR47","unstructured":"Sanchez, G., Fan, H., Spangher, A., Levi, E., Ammanamanchi, P.S., Biderman, S.: Stay on topic with classifier-free guidance. arXiv preprint arXiv:2306.17806 (2023)"},{"key":"12_CR48","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., et al.: Taking a hint: leveraging explanations to make vision and language models more grounded. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2591\u20132600 (2019)","DOI":"10.1109\/ICCV.2019.00268"},{"key":"12_CR49","doi-asserted-by":"crossref","unstructured":"Shi, W., Han, X., Lewis, M., Tsvetkov, Y., Zettlemoyer, L., tau Yih, S.W.: Trusting your evidence: hallucinate less with context-aware decoding (2023)","DOI":"10.18653\/v1\/2024.naacl-short.69"},{"key":"12_CR50","doi-asserted-by":"crossref","unstructured":"Shtedritski, A., Rupprecht, C., Vedaldi, A.: What does clip know about a red circle? Visual prompt engineering for vlms. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 11987\u201311997 (2023)","DOI":"10.1109\/ICCV51070.2023.01101"},{"key":"12_CR51","doi-asserted-by":"crossref","unstructured":"Singh, A., et al.: FLAVA: a foundational language and vision alignment model. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"12_CR52","doi-asserted-by":"crossref","unstructured":"Sun, Z., et al.: Alpha-CLIP: a CLIP model focusing on wherever you want (2023)","DOI":"10.1109\/CVPR52733.2024.01237"},{"key":"12_CR53","doi-asserted-by":"crossref","unstructured":"Thrush, T., et al.: Winoground: probing vision and language models for visio-linguistic compositionality. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5238\u20135248 (2022)","DOI":"10.1109\/CVPR52688.2022.00517"},{"key":"12_CR54","doi-asserted-by":"publisher","unstructured":"Wang, S., et al.: Imagen editor and editbench: advancing and evaluating text-guided image inpainting. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 18359\u201318369. IEEE Computer Society, Los Alamitos (2023). https:\/\/doi.org\/10.1109\/CVPR52729.2023.01761, https:\/\/doi.ieeecomputersociety.org\/10.1109\/CVPR52729.2023.01761","DOI":"10.1109\/CVPR52729.2023.01761"},{"key":"12_CR55","unstructured":"Wu, J., Mooney, R.: Self-critical reasoning for robust visual question answering. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"12_CR56","unstructured":"Yang, J., Zhang, H., Li, F., Zou, X., Li, C., Gao, J.: Set-of-mark prompting unleashes extraordinary visual grounding in GPT-4V (2023). http:\/\/arxiv.org\/abs\/2310.11441"},{"key":"12_CR57","unstructured":"Yao, Y., Zhang, A., Zhang, Z., Liu, Z., Chua, T.S., Sun, M.: CPT: colorful prompt tuning for pre-trained vision-language models (2021). http:\/\/arxiv.org\/abs\/2109.11797"},{"key":"12_CR58","unstructured":"Yarom, M., et al.: What you see is what you read? Improving text-image alignment evaluation. In: Thirty-seventh Conference on Neural Information Processing Systems (2023). https:\/\/openreview.net\/forum?id=j5AoleAIru"},{"key":"12_CR59","first-page":"17057","volume":"35","author":"Z Ying","year":"2022","unstructured":"Ying, Z., Hase, P., Bansal, M.: VisFIS: visual feature importance supervision with right-for-the-right-reason objectives. Adv. Neural. Inf. Process. Syst. 35, 17057\u201317072 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"12_CR60","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., Hockenmaier, J.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans. Assoc. Comput. Linguist. 2, 67\u201378 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"12_CR61","unstructured":"Zellers, R., et al.: MERLOT: multimodal neural script knowledge models. In: NeurIPS (2021). http:\/\/arxiv.org\/abs\/2106.02636"},{"key":"12_CR62","unstructured":"Zhang, H., et al.: GLIPv2: unifying localization and vision-language understanding. arXiv preprint arXiv:2206.05836 (2022)"},{"key":"12_CR63","doi-asserted-by":"crossref","unstructured":"Zhang, P., Goyal, Y., Summers-Stay, D., Batra, D., Parikh, D.: Yin and Yang: balancing and answering binary visual questions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5014\u20135022 (2016)","DOI":"10.1109\/CVPR.2016.542"},{"key":"12_CR64","unstructured":"Zhang, S., et al.: GPT4RoI: instruction tuning large language model on region-of-interest (2023)"},{"key":"12_CR65","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Qian, S., Peng, B., Liu, S., Jia, J.: Prompt highlighter: interactive control for multi-modal LLMs. arXiv preprint arXiv:2312.04302 (2023)","DOI":"10.1109\/CVPR52733.2024.01255"},{"key":"12_CR66","unstructured":"Zhao, L., Deng, Y., Zhang, W., Gu, Q.: Mitigating object hallucination in large vision-language models via classifier-free guidance (2024)"},{"key":"12_CR67","unstructured":"Zou, X., et al.: Segment everything everywhere all at once. In: Thirty-seventh Conference on Neural Information Processing Systems (2023). https:\/\/openreview.net\/forum?id=UHBrWeFWlL"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72986-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,30]],"date-time":"2024-11-30T16:45:42Z","timestamp":1732985142000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72986-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,2]]},"ISBN":["9783031729850","9783031729867"],"references-count":67,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72986-7_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,2]]},"assertion":[{"value":"2 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}