{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T16:01:27Z","timestamp":1778083287560,"version":"3.51.4"},"publisher-location":"Cham","reference-count":78,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729454","type":"print"},{"value":"9783031729461","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T00:00:00Z","timestamp":1727827200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T00:00:00Z","timestamp":1727827200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72946-1_19","type":"book-chapter","created":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T19:02:08Z","timestamp":1727809328000},"page":"330-348","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":69,"title":["SparseCtrl: Adding Sparse Controls to\u00a0Text-to-Video Diffusion Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1516-4083","authenticated-orcid":false,"given":"Yuwei","family":"Guo","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1417-1938","authenticated-orcid":false,"given":"Ceyuan","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1004-7753","authenticated-orcid":false,"given":"Anyi","family":"Rao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8996-7327","authenticated-orcid":false,"given":"Maneesh","family":"Agrawala","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8865-7896","authenticated-orcid":false,"given":"Dahua","family":"Lin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0777-9232","authenticated-orcid":false,"given":"Bo","family":"Dai","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,2]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Bain, M., Nagrani, A., Varol, G., Zisserman, A.: Frozen in time: a joint video and image encoder for end-to-end retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1728\u20131738 (2021)","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"19_CR2","unstructured":"Balaji, Y., et\u00a0al.: eDiffi: text-to-image diffusion models with an ensemble of expert denoisers. arXiv preprint arXiv:2211.01324 (2022)"},{"key":"19_CR3","unstructured":"Bar-Tal, O., et\u00a0al.: Lumiere: a space-time diffusion model for video generation. arXiv preprint arXiv:2401.12945 (2024)"},{"key":"19_CR4","unstructured":"Blattmann, A., et\u00a0al.: Stable video diffusion: scaling latent video diffusion models to large datasets. arXiv preprint arXiv:2311.15127 (2023)"},{"key":"19_CR5","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your latents: high-resolution video synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22563\u201322575 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"19_CR6","unstructured":"Bradcatt: Toonyou (2023). https:\/\/civitai.com\/models\/30240\/toonyou"},{"key":"19_CR7","unstructured":"Brooks, T., et al.: Video generation models as world simulators (2024). https:\/\/openai.com\/research\/video-generation-models-as-world-simulators"},{"key":"19_CR8","unstructured":"Chen, H., et al.: Videocrafter1: open diffusion models for high-quality video generation (2023)"},{"key":"19_CR9","doi-asserted-by":"crossref","unstructured":"Chen, T.S., et al.: Panda-70m: Captioning 70m videos with multiple cross-modality teachers (2024)","DOI":"10.1109\/CVPR52733.2024.01265"},{"key":"19_CR10","unstructured":"Chen, W., et al.: Control-a-video: controllable text-to-video generation with diffusion models. arXiv preprint arXiv:2305.13840 (2023)"},{"key":"19_CR11","unstructured":"Chen, X., et al.: Seine: short-to-long video diffusion model for generative transition and prediction (2023)"},{"key":"19_CR12","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advance in Neural Information Processing System, vol. 34, pp. 8780\u20138794 (2021)"},{"key":"19_CR13","doi-asserted-by":"crossref","unstructured":"Ding, S., et al.: Motion-aware contrastive video representation learning via foreground-background merging. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9716\u20139726 (2022)","DOI":"10.1109\/CVPR52688.2022.00949"},{"key":"19_CR14","unstructured":"Ding, S., et al.: Motion-inductive self-supervised object discovery in videos. arXiv preprint arXiv:2210.00221 (2022)"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Esser, P., Chiu, J., Atighehchian, P., Granskog, J., Germanidis, A.: Structure and content-guided video synthesis with diffusion models. arXiv preprint arXiv:2302.03011 (2023)","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"19_CR16","unstructured":"Gal, R., et al.: An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"issue":"4","key":"19_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592133","volume":"42","author":"R Gal","year":"2023","unstructured":"Gal, R., Arar, M., Atzmon, Y., Bermano, A.H., Chechik, G., Cohen-Or, D.: Encoder-based domain tuning for fast personalization of text-to-image models. ACM Trans. Graph. (TOG) 42(4), 1\u201313 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Ge, S., et al.: Preserve your own correlation: a noise prior for video diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22930\u201322941 (2023)","DOI":"10.1109\/ICCV51070.2023.02096"},{"key":"19_CR19","doi-asserted-by":"crossref","unstructured":"Gu, S., et al.: Vector quantized diffusion model for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10696\u201310706 (2022)","DOI":"10.1109\/CVPR52688.2022.01043"},{"key":"19_CR20","unstructured":"Guo, Y., et al.: AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)"},{"key":"19_CR21","unstructured":"Gupta, A., et al.: Photorealistic video generation with diffusion models. arXiv preprint arXiv:2312.06662 (2023)"},{"key":"19_CR22","unstructured":"He, Y., Yang, T., Zhang, Y., Shan, Y., Chen, Q.: Latent video diffusion models for high-fidelity video generation with arbitrary lengths. arXiv preprint arXiv:2211.13221 (2022)"},{"key":"19_CR23","unstructured":"Ho, J., et\u00a0al.: Imagen video: high definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"19_CR24","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advance in Neural Information Processing System, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"19_CR25","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., Fleet, D.J.: Video diffusion models. arXiv preprint arXiv:2204.03458 (2022)"},{"key":"19_CR26","unstructured":"Hong, W., Ding, M., Zheng, W., Liu, X., Tang, J.: Cogvideo: large-scale pretraining for text-to-video generation via transformers. arXiv preprint arXiv:2205.15868 (2022)"},{"key":"19_CR27","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"19_CR28","unstructured":"Hu, L., Gao, X., Zhang, P., Sun, K., Zhang, B., Bo, L.: Animate anyone: consistent and controllable image-to-video synthesis for character animation. arXiv preprint arXiv:2311.17117 (2023)"},{"key":"19_CR29","doi-asserted-by":"crossref","unstructured":"Karras, J., Holynski, A., Wang, T.C., Kemelmacher-Shlizerman, I.: Dreampose: fashion video synthesis with stable diffusion. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22680\u201322690 (2023)","DOI":"10.1109\/ICCV51070.2023.02073"},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Khachatryan, L., et al.: Text2video-zero: text-to-image diffusion models are zero-shot video generators. In: IEEE International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"19_CR31","unstructured":"Kondratyuk, D., et\u00a0al.: Videopoet: a large language model for zero-shot video generation. arXiv preprint arXiv:2312.14125 (2023)"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"19_CR33","unstructured":"Ma, X., et al.: Latte: latent diffusion transformer for video generation. arXiv preprint arXiv:2401.03048 (2024)"},{"key":"19_CR34","doi-asserted-by":"crossref","unstructured":"Ma, Y., et al.: Follow your pose: pose-guided text-to-video generation using pose-free videos. arXiv preprint arXiv:2304.01186 (2023)","DOI":"10.1609\/aaai.v38i5.28206"},{"key":"19_CR35","doi-asserted-by":"crossref","unstructured":"Menapace, W., et\u00a0al.: Snap video: scaled spatiotemporal transformers for text-to-video synthesis. arXiv preprint arXiv:2402.14797 (2024)","DOI":"10.1109\/CVPR52733.2024.00672"},{"issue":"1","key":"19_CR36","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3503250","volume":"65","author":"B Mildenhall","year":"2021","unstructured":"Mildenhall, B., Srinivasan, P.P., Tancik, M., Barron, J.T., Ramamoorthi, R., Ng, R.: NeRF: representing scenes as neural radiance fields for view synthesis. Commun. ACM 65(1), 99\u2013106 (2021)","journal-title":"Commun. ACM"},{"key":"19_CR37","doi-asserted-by":"crossref","unstructured":"Mou, C., et al.: T2i-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv preprint arXiv:2302.08453 (2023)","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"19_CR38","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"19_CR39","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"3","key":"19_CR40","doi-asserted-by":"publisher","first-page":"1623","DOI":"10.1109\/TPAMI.2020.3019967","volume":"44","author":"R Ranftl","year":"2020","unstructured":"Ranftl, R., Lasinger, K., Hafner, D., Schindler, K., Koltun, V.: Towards robust monocular depth estimation: mixing datasets for zero-shot cross-dataset transfer. IEEE Trans. Pattern Anal. Mach. Intell. 44(3), 1623\u20131637 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"19_CR41","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"19_CR42","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"19_CR43","doi-asserted-by":"crossref","unstructured":"Ruan, L., et al.: Mm-diffusion: Learning multi-modal diffusion models for joint audio and video generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10219\u201310228 (2023)","DOI":"10.1109\/CVPR52729.2023.00985"},{"key":"19_CR44","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"19_CR45","doi-asserted-by":"crossref","unstructured":"Ruiz, N., et al.: Hyperdreambooth: hypernetworks for fast personalization of text-to-image models. arXiv preprint arXiv:2307.06949 (2023)","DOI":"10.1109\/CVPR52733.2024.00624"},{"key":"19_CR46","unstructured":"runwayml: Stable diffusion v1.5 (2022). https:\/\/huggingface.co\/runwayml\/stable-diffusion-v1-5"},{"key":"19_CR47","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Advance in Neural Information Processing System, vol. 35, pp. 36479\u201336494 (2022)"},{"key":"19_CR48","unstructured":"SG_161222: Realistic vision v5.1 (2023). https:\/\/civitai.com\/models\/4201\/realistic-vision-v51"},{"key":"19_CR49","unstructured":"Singer, U., et\u00a0al.: Make-a-video: text-to-video generation without text-video data. arXiv preprint arXiv:2209.14792 (2022)"},{"key":"19_CR50","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning, pp. 2256\u20132265. PMLR (2015)"},{"key":"19_CR51","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"19_CR52","doi-asserted-by":"crossref","unstructured":"Tian, L., Wang, Q., Zhang, B., Bo, L.: EMO: emote portrait alive-generating expressive portrait videos with audio2video diffusion model under weak conditions. arXiv preprint arXiv:2402.17485 (2024)","DOI":"10.1007\/978-3-031-73010-8_15"},{"key":"19_CR53","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Advance in Neural Information Processing System, vol. 35, pp. 10078\u201310093 (2022)"},{"key":"19_CR54","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"19_CR55","doi-asserted-by":"crossref","unstructured":"Vinker, Y., Alaluf, Y., Cohen-Or, D., Shamir, A.: CLIPascene: scene sketching with different types and levels of abstraction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4146\u20134156 (2023)","DOI":"10.1109\/ICCV51070.2023.00383"},{"key":"19_CR56","doi-asserted-by":"crossref","unstructured":"Voynov, A., Aberman, K., Cohen-Or, D.: Sketch-guided text-to-image diffusion models. In: ACM SIGGRAPH 2023 Conference Proceedings, pp. 1\u201311 (2023)","DOI":"10.1145\/3588432.3591560"},{"key":"19_CR57","unstructured":"Wang, J., et al.: Boximator: generating rich and controllable motions for video synthesis. arXiv preprint arXiv:2402.01566 (2024)"},{"key":"19_CR58","unstructured":"Wang, W., et\u00a0al.: Magicvideo-v2: multi-stage high-aesthetic video generation. arXiv preprint arXiv:2401.04468 (2024)"},{"key":"19_CR59","unstructured":"Wang, X., et al.: VideoComposer: compositional video synthesis with motion controllability. arXiv preprint arXiv:2306.02018 (2023)"},{"key":"19_CR60","unstructured":"Wang, Y., et al.: LAVIE: high-quality video generation with cascaded latent diffusion models (2023)"},{"key":"19_CR61","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: MotionCtrl: a unified and flexible motion controller for video generation. arXiv preprint arXiv:2312.03641 (2023)","DOI":"10.1145\/3641519.3657518"},{"key":"19_CR62","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"19_CR63","doi-asserted-by":"crossref","unstructured":"Wu, Q., et al.: Harnessing the spatial-temporal attention of diffusion models for high-fidelity text-to-image synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7766\u20137776 (2023)","DOI":"10.1109\/ICCV51070.2023.00714"},{"key":"19_CR64","doi-asserted-by":"crossref","unstructured":"Xing, J., et al.: Dynamicrafter: animating open-domain images with video diffusion priors. arXiv preprint arXiv:2310.12190 (2023)","DOI":"10.1007\/978-3-031-72952-2_23"},{"key":"19_CR65","doi-asserted-by":"crossref","unstructured":"Xu, X., Guo, J., Wang, Z., Huang, G., Essa, I., Shi, H.: Prompt-free diffusion: taking \u201c text\u201d out of text-to-image diffusion models. arXiv preprint arXiv:2305.16223 (2023)","DOI":"10.1109\/CVPR52733.2024.00829"},{"key":"19_CR66","doi-asserted-by":"crossref","unstructured":"Xu, Z., et al.: MagicAnimate: temporally consistent human image animation using diffusion model. arXiv preprint arXiv:2311.16498 (2023)","DOI":"10.1109\/CVPR52733.2024.00147"},{"key":"19_CR67","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"19_CR68","unstructured":"Yin, S., et al.: DragNUWA: fine-grained control in video generation by integrating text, image, and trajectory. arXiv preprint arXiv:2308.08089 (2023)"},{"key":"19_CR69","doi-asserted-by":"crossref","unstructured":"Yu, L., et\u00a0al.: MAGVIT: masked generative video transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10459\u201310469 (2023)","DOI":"10.1109\/CVPR52729.2023.01008"},{"key":"19_CR70","doi-asserted-by":"crossref","unstructured":"Zeng, Y., et al.: Make pixels dance: high-dynamic video generation. arXiv preprint arXiv:2311.10982 (2023)","DOI":"10.1109\/CVPR52733.2024.00845"},{"key":"19_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, D.J., et al.: Show-1: marrying pixel and latent diffusion models for text-to-video generation. arXiv preprint arXiv:2309.15818 (2023)","DOI":"10.1007\/s11263-024-02271-9"},{"key":"19_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"19_CR73","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"19_CR74","unstructured":"Zhang, S., et al.: I2vgen-xl: high-quality image-to-video synthesis via cascaded diffusion models. arXiv preprint arXiv:2311.04145 (2023)"},{"key":"19_CR75","unstructured":"Zhang, Y., Wei, Y., Jiang, D., Zhang, X., Zuo, W., Tian, Q.: Controlvideo: training-free controllable text-to-video generation. arXiv preprint arXiv:2305.13077 (2023)"},{"key":"19_CR76","unstructured":"Zhao, R., et al.: Motiondirector: motion customization of text-to-video diffusion models. arXiv preprint arXiv:2310.08465 (2023)"},{"key":"19_CR77","unstructured":"Zhao, S., et al.: Uni-controlnet: all-in-one control to text-to-image diffusion models. arXiv preprint arXiv:2305.16322 (2023)"},{"key":"19_CR78","unstructured":"Zhou, D., Wang, W., Yan, H., Lv, W., Zhu, Y., Feng, J.: Magicvideo: efficient video generation with latent diffusion models. arXiv preprint arXiv:2211.11018 (2022)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72946-1_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T23:36:54Z","timestamp":1732837014000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72946-1_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,2]]},"ISBN":["9783031729454","9783031729461"],"references-count":78,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72946-1_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,2]]},"assertion":[{"value":"2 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}