{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T03:40:17Z","timestamp":1743133217221,"version":"3.40.3"},"publisher-location":"Cham","reference-count":77,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031736605"},{"type":"electronic","value":"9783031736612"}],"license":[{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,10]],"date-time":"2024-11-10T00:00:00Z","timestamp":1731196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73661-2_26","type":"book-chapter","created":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T11:08:04Z","timestamp":1731150484000},"page":"465-482","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Free-ATM: Harnessing Free Attention Masks for\u00a0Representation Learning on\u00a0Diffusion-Generated Images"],"prefix":"10.1007","author":[{"given":"David Junhao","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mutian","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jay Zhangjie","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chuhui","family":"Xue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenqing","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaoguang","family":"Han","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Song","family":"Bai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mike Zheng","family":"Shou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,10]]},"reference":[{"unstructured":"https:\/\/github.com\/deep-floyd\/IF","key":"26_CR1"},{"unstructured":"Azizi, S., Kornblith, S., Saharia, C., Norouzi, M., Fleet, D.J.: Synthetic data from diffusion models improves ImageNet classification. arXiv preprint arXiv:2304.08466 (2023)","key":"26_CR2"},{"unstructured":"Bachman, P., Hjelm, R.D., Buchwalter, W.: Learning representations by maximizing mutual information across views. In: NeurIPS (2019)","key":"26_CR3"},{"unstructured":"Baevski, A., Hsu, W.N., Xu, Q., Babu, A., Gu, J., Auli, M.: data2vec: a general framework for self-supervised learning in speech, vision and language. arXiv preprint arXiv:2202.03555 (2022)","key":"26_CR4"},{"unstructured":"Bao, H., Dong, L., Piao, S., Wei, F.: BEit: BERT pre-training of image transformers. In: ICLR (2022)","key":"26_CR5"},{"doi-asserted-by":"crossref","unstructured":"Besnier, V., Jain, H., Bursuc, A., Cord, M., P\u00e9rez, P.: This dataset does not exist: training models from generated images. In: ICASSP (2020)","key":"26_CR6","DOI":"10.1109\/ICASSP40776.2020.9053146"},{"unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. In: ICLR (2019)","key":"26_CR7"},{"doi-asserted-by":"crossref","unstructured":"Changpinyo, S., Sharma, P., Ding, N., Soricut, R.: Conceptual 12M: pushing web-scale image-text pre-training to recognize long-tail visual concepts. In: CVPR (2021)","key":"26_CR8","DOI":"10.1109\/CVPR46437.2021.00356"},{"unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: ICML (2020)","key":"26_CR9"},{"unstructured":"Chen, X., Fan, H., Girshick, R., He, K.: Improved baselines with momentum contrastive learning. arXiv preprint arXiv:2003.04297 (2020)","key":"26_CR10"},{"doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple siamese representation learning. In: CVPR (2021)","key":"26_CR11","DOI":"10.1109\/CVPR46437.2021.01549"},{"doi-asserted-by":"crossref","unstructured":"Cordts, M., et al.: The cityscapes dataset for semantic urban scene understanding. In: CVPR (2016)","key":"26_CR12","DOI":"10.1109\/CVPR.2016.350"},{"doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR (2009)","key":"26_CR13","DOI":"10.1109\/CVPR.2009.5206848"},{"unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: NeurIPS (2021)","key":"26_CR14"},{"unstructured":"Dosovitskiy, A., et al.: An image is worth $$16\\times 16$$ words: transformers for image recognition at scale. In: ICLR (2021)","key":"26_CR15"},{"doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: CVPR (2021)","key":"26_CR16","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"26_CR17","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The PASCAL visual object classes (VOC) challenge. IJCV 88, 303\u2013338 (2010)","journal-title":"IJCV"},{"doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: ICCV (2015)","key":"26_CR18","DOI":"10.1109\/ICCV.2015.169"},{"issue":"11","key":"26_CR19","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"unstructured":"Grill, J.B., et al.: Bootstrap your own latent - a new approach to self-supervised learning. In: NeurIPS (2020)","key":"26_CR20"},{"doi-asserted-by":"crossref","unstructured":"He, K., Chen, X., Xie, S., Li, Y., Doll\u00e1r, P., Girshick, R.: Masked autoencoders are scalable vision learners. In: CVPR (2022)","key":"26_CR21","DOI":"10.1109\/CVPR52688.2022.01553"},{"doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: CVPR (2020)","key":"26_CR22","DOI":"10.1109\/CVPR42600.2020.00975"},{"doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Dollar, P., Girshick, R.: Mask R-CNN. In: ICCV (2017)","key":"26_CR23","DOI":"10.1109\/ICCV.2017.322"},{"doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","key":"26_CR24","DOI":"10.1109\/CVPR.2016.90"},{"unstructured":"He, R., et al.: Is synthetic data from generative models ready for image recognition? In: ICLR (2023)","key":"26_CR25"},{"doi-asserted-by":"crossref","unstructured":"H\u00e9naff, O.J., Koppula, S., Alayrac, J.B., Van\u00a0den Oord, A., Vinyals, O., Carreira, J.: Efficient visual pretraining with contrastive detection. In: ICCV (2021)","key":"26_CR26","DOI":"10.1109\/ICCV48922.2021.00993"},{"key":"26_CR27","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1007\/978-3-031-19812-0_8","volume-title":"ECCV 2022","author":"OJ H\u00e9naff","year":"2022","unstructured":"H\u00e9naff, O.J., et al.: Object discovery and representation networks. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13687, pp. 123\u2013143. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_8"},{"unstructured":"H\u00e9naff, O.J., et al.: Data-efficient image recognition with contrastive predictive coding. arXiv preprint arXiv:1905.09272 (2019)","key":"26_CR28"},{"unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)","key":"26_CR29"},{"unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: NeurIPS (2020)","key":"26_CR30"},{"unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)","key":"26_CR31"},{"unstructured":"Jahanian, A., Puig, X., Tian, Y., Isola, P.: Generative models as a data source for multiview representation learning. In: ICLR (2022)","key":"26_CR32"},{"issue":"12","key":"26_CR33","doi-asserted-by":"publisher","first-page":"4217","DOI":"10.1109\/TPAMI.2020.2970919","volume":"43","author":"T Karras","year":"2021","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. TPAMI 43(12), 4217\u20134228 (2021)","journal-title":"TPAMI"},{"unstructured":"Kim, W., Son, B., Kim, I.: ViLT: vision-and-language transformer without convolution or region supervision. In: ICML (2021)","key":"26_CR34"},{"unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)","key":"26_CR35"},{"doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. In: ICCV (2023)","key":"26_CR36","DOI":"10.1109\/ICCV51070.2023.00371"},{"unstructured":"Kr\u00e4henb\u00fchl, P., Koltun, V.: Efficient inference in fully connected CRFs with Gaussian edge potentials. In: NeurIPS (2011)","key":"26_CR37"},{"key":"26_CR38","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. 123, 32\u201373 (2017)","journal-title":"Int. J. Comput. Vis."},{"doi-asserted-by":"crossref","unstructured":"Li, D., et al.: BigDatasetGAN: synthesizing ImageNet with pixel-wise annotations. In: CVPR (2022)","key":"26_CR39","DOI":"10.1109\/CVPR52688.2022.02064"},{"unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML (2022)","key":"26_CR40"},{"doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training. In: CVPR (2022)","key":"26_CR41","DOI":"10.1109\/CVPR52729.2023.02240"},{"doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: CVPR (2017)","key":"26_CR42","DOI":"10.1109\/CVPR.2017.106"},{"key":"26_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"doi-asserted-by":"crossref","unstructured":"Liu, Z., Stent, S., Li, J., Gideon, J., Han, S.: LocTex: learning data-efficient visual representations from localized textual supervision. In: ICCV (2021)","key":"26_CR44","DOI":"10.1109\/ICCV48922.2021.00217"},{"unstructured":"Locatello, F., et al.: Object-centric learning with slot attention. In: NeurIPS (2020)","key":"26_CR45"},{"doi-asserted-by":"crossref","unstructured":"Misra, I., van der Maaten, L.: Self-supervised learning of pretext-invariant representations. In: CVPR (2020)","key":"26_CR46","DOI":"10.1109\/CVPR42600.2020.00674"},{"unstructured":"Nichol, A., et al.: GLIDE: towards photorealistic image generation and editing with text-guided diffusion models. In: ICML (2022)","key":"26_CR47"},{"unstructured":"van den Oord, A., Li, Y., Vinyals, O.: Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)","key":"26_CR48"},{"unstructured":"Ordonez, V., Kulkarni, G., Berg, T.: Im2Text: describing images using 1 million captioned photographs. In: NeurIPS (2011)","key":"26_CR49"},{"doi-asserted-by":"crossref","unstructured":"Orekondy, T., Schiele, B., Fritz, M.: Towards a visual privacy advisor: understanding and predicting privacy risks in images. In: ICCV (2017)","key":"26_CR50","DOI":"10.1109\/ICCV.2017.398"},{"unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: ICML (2021)","key":"26_CR51"},{"unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: ICML (2021)","key":"26_CR52"},{"doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR (2022)","key":"26_CR53","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"26_CR54","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: NeurIPS (2022)","key":"26_CR55"},{"doi-asserted-by":"crossref","unstructured":"Sariyildiz, M.B., Alahari, K., Larlus, D., Kalantidis, Y.: Fake it till you make it: learning transferable representations from synthetic ImageNet clones. In: CVPR (2023)","key":"26_CR56","DOI":"10.1109\/CVPR52729.2023.00774"},{"doi-asserted-by":"crossref","unstructured":"Sharma, P., Ding, N., Goodman, S., Soricut, R.: Conceptual captions: a cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: ACL (2018)","key":"26_CR57","DOI":"10.18653\/v1\/P18-1238"},{"doi-asserted-by":"crossref","unstructured":"Shi, Y., Xue, C., Pan, J., Zhang, W., Tan, V.Y., Bai, S.: DragDiffusion: harnessing diffusion models for interactive point-based image editing. arXiv preprint arXiv:2306.14435 (2023)","key":"26_CR58","DOI":"10.1109\/CVPR52733.2024.00844"},{"unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: ICLR (2021)","key":"26_CR59"},{"key":"26_CR60","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"776","DOI":"10.1007\/978-3-030-58621-8_45","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Tian","year":"2020","unstructured":"Tian, Y., Krishnan, D., Isola, P.: Contrastive multiview coding. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12356, pp. 776\u2013794. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58621-8_45"},{"unstructured":"Trabucco, B., Doherty, K., Gurinas, M., Salakhutdinov, R.: Effective data augmentation with diffusion models. arXiv preprint arXiv:2302.07944 (2023)","key":"26_CR61"},{"doi-asserted-by":"crossref","unstructured":"Wang, A.J., Zhou, P., Shou, M.Z., Yan, S.: Position-guided text prompt for vision-language pre-training. In: CVPR (2023)","key":"26_CR62","DOI":"10.1109\/CVPR52729.2023.02226"},{"doi-asserted-by":"crossref","unstructured":"Wang, H., Song, K., Fan, J., Wang, Y., Xie, J., Zhang, Z.: Hard patches mining for masked image modeling. In: CVPR (2023)","key":"26_CR63","DOI":"10.1109\/CVPR52729.2023.01000"},{"doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, R., Shen, C., Kong, T., Li, L.: Dense contrastive learning for self-supervised visual pre-training. In: CVPR (2021)","key":"26_CR64","DOI":"10.1109\/CVPR46437.2021.00304"},{"doi-asserted-by":"crossref","unstructured":"Wei, C., Fan, H., Xie, S., Wu, C.Y., Yuille, A., Feichtenhofer, C.: Masked feature prediction for self-supervised visual pre-training. In: CVPR (2022)","key":"26_CR65","DOI":"10.1109\/CVPR52688.2022.01426"},{"unstructured":"Wu, W., et al.: DatasetDM: synthesizing data with perception annotations using diffusion models. In: NeurIPS (2023)","key":"26_CR66"},{"doi-asserted-by":"crossref","unstructured":"Wu, W., Zhao, Y., Shou, M.Z., Zhou, H., Shen, C.: DiffuMask: synthesizing images with pixel-level annotations for semantic segmentation using diffusion models. arXiv preprint arXiv:2303.11681 (2023)","key":"26_CR67","DOI":"10.1109\/ICCV51070.2023.00117"},{"doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S.X., Lin, D.: Unsupervised feature learning via non-parametric instance discrimination. In: CVPR (2018)","key":"26_CR68","DOI":"10.1109\/CVPR.2018.00393"},{"key":"26_CR69","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"432","DOI":"10.1007\/978-3-030-01228-1_26","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Xiao","year":"2018","unstructured":"Xiao, T., Liu, Y., Zhou, B., Jiang, Y., Sun, J.: Unified perceptual parsing for scene understanding. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11209, pp. 432\u2013448. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01228-1_26"},{"doi-asserted-by":"crossref","unstructured":"Xie, Z., et al.: SimMIM: a simple framework for masked image modeling. In: CVPR (2022)","key":"26_CR70","DOI":"10.1109\/CVPR52688.2022.00943"},{"unstructured":"Yang, L., Xu, X., Kang, B., Shi, Y., Zhao, H.: FreeMask: synthetic images with dense annotations make stronger segmentation models. In: NeurIPS (2023)","key":"26_CR71"},{"doi-asserted-by":"crossref","unstructured":"Ye, M., Zhang, X., Yuen, P.C., Chang, S.F.: Unsupervised embedding learning via invariant and spreading instance feature. In: CVPR (2019)","key":"26_CR72","DOI":"10.1109\/CVPR.2019.00637"},{"doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: DatasetGAN: efficient labeled data factory with minimal human effort. In: CVPR (2021)","key":"26_CR73","DOI":"10.1109\/CVPR46437.2021.01001"},{"unstructured":"Zhao, B., Bilen, H.: Synthesizing informative training samples with GAN. In: NeurIPS 2022 Workshop on Synthetic Data for Empowering ML Research (2022)","key":"26_CR74"},{"doi-asserted-by":"crossref","unstructured":"Zhao, W., Rao, Y., Liu, Z., Liu, B., Zhou, J., Lu, J.: Unleashing text-to-image diffusion models for visual perception. arXiv preprint arXiv:2303.02153 (2023)","key":"26_CR75","DOI":"10.1109\/ICCV51070.2023.00527"},{"key":"26_CR76","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou, B., et al.: Semantic understanding of scenes through the ADE20K dataset. IJCV 127, 302\u2013321 (2019)","journal-title":"IJCV"},{"unstructured":"Zhou, J., et al.: iBOT: image BERT pre-training with online tokenizer. In: ICLR (2022)","key":"26_CR77"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73661-2_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T12:09:52Z","timestamp":1731154192000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73661-2_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,10]]},"ISBN":["9783031736605","9783031736612"],"references-count":77,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73661-2_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,10]]},"assertion":[{"value":"10 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}