{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T17:22:24Z","timestamp":1771953744295,"version":"3.50.1"},"publisher-location":"Cham","reference-count":78,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031928079","type":"print"},{"value":"9783031928086","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92808-6_2","type":"book-chapter","created":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T15:59:38Z","timestamp":1748361578000},"page":"19-37","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":14,"title":["Magic-Me: Identity-Specific Video Customized Diffusion"],"prefix":"10.1007","author":[{"given":"Ze","family":"Ma","sequence":"first","affiliation":[]},{"given":"Daquan","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Xue-She","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chun-Hsiao","family":"Yeh","sequence":"additional","affiliation":[]},{"given":"Xiuyu","family":"Li","sequence":"additional","affiliation":[]},{"given":"Huanrui","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Kurt","family":"Keutzer","sequence":"additional","affiliation":[]},{"given":"Jiashi","family":"Feng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"2_CR1","unstructured":"Stable diffusion (2022). https:\/\/huggingface.co\/runwayml\/stable-diffusion-v1-5"},{"key":"2_CR2","unstructured":"Rcnz cartoon 3d v1.0 (2023). https:\/\/civitai.com\/models\/66347?modelVersionId=71009"},{"key":"2_CR3","unstructured":"Realistic vision v5.1 (2023). https:\/\/civitai.com\/models\/4201\/realistic-vision-v51"},{"key":"2_CR4","unstructured":"Toonyou beta 3 (2023). https:\/\/civitai.com\/models\/30240?modelVersionId=78775"},{"key":"2_CR5","doi-asserted-by":"crossref","unstructured":"Alaluf, Y., Richardson, E., Metzer, G., Cohen-Or, D.: A neural space-time representation for text-to-image personalization. arXiv preprint arXiv:2305.15391 (2023)","DOI":"10.1145\/3618322"},{"key":"2_CR6","doi-asserted-by":"publisher","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. In: SIGGRAPH Asia 2023 Conference Papers, SA 2023. Association for Computing Machinery, New York (2023). https:\/\/doi.org\/10.1145\/3610548.3618154","DOI":"10.1145\/3610548.3618154"},{"key":"2_CR7","unstructured":"Bai, J., et al.: Integrating view conditions for image synthesis. arXiv preprint arXiv:2310.16002 (2023)"},{"key":"2_CR8","unstructured":"Betker, J., et al.: Improving image generation with better captions (2023). https:\/\/cdn.openai.com\/papers\/dall-e-3.pdf"},{"key":"2_CR9","doi-asserted-by":"crossref","unstructured":"Blattmann, A., et al.: Align your latents: high-resolution video synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22563\u201322575 (2023)","DOI":"10.1109\/CVPR52729.2023.02161"},{"key":"2_CR10","doi-asserted-by":"crossref","unstructured":"Caron, M., et al.: Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9650\u20139660 (2021)","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"2_CR11","unstructured":"Chen, H., et al.: Videodreamer: customized multi-subject text-to-video generation with disen-mix finetuning (2023)"},{"key":"2_CR12","unstructured":"Chen, H., Zhang, Y., Wang, X., Duan, X., Zhou, Y., Zhu, W.: Disenbooth: identity-preserving disentangled tuning for subject-driven text-to-image generation. arXiv preprint arXiv:2305.03374 (2023)"},{"key":"2_CR13","unstructured":"Civitai: Civitai (2022). https:\/\/civitai.com\/"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Dorkenwald, M., Milbich, T., Blattmann, A., Rombach, R., Derpanis, K.G., Ommer, B.: Stochastic image-to-video synthesis using cinns. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3742\u20133753 (2021)","DOI":"10.1109\/CVPR46437.2021.00374"},{"key":"2_CR15","doi-asserted-by":"crossref","unstructured":"Esser, P., Chiu, J., Atighehchian, P., Granskog, J., Germanidis, A.: Structure and content-guided video synthesis with diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7346\u20137356 (2023)","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"2_CR16","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"2_CR17","unstructured":"Gu, Y., et\u00a0al.: Mix-of-show: decentralized low-rank adaptation for multi-concept customization of diffusion models. arXiv preprint arXiv:2305.18292 (2023)"},{"key":"2_CR18","unstructured":"Guo, Y., et al.: Animatediff: animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Ha, S., Kersner, M., Kim, B., Seo, S., Kim, D.: Marionette: few-shot face reenactment preserving identity of unseen targets. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 10893\u201310900 (2020)","DOI":"10.1609\/aaai.v34i07.6721"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Han, L., Li, Y., Zhang, H., Milanfar, P., Metaxas, D., Yang, F.: Svdiff: compact parameter space for diffusion fine-tuning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7323\u20137334 (2023)","DOI":"10.1109\/ICCV51070.2023.00673"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: Clipscore: a reference-free evaluation metric for image captioning. arXiv preprint arXiv:2104.08718 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"2_CR22","unstructured":"Ho, J., et\u00a0al.: Imagen video: high definition video generation with diffusion models. arXiv preprint arXiv:2210.02303 (2022)"},{"key":"2_CR23","unstructured":"Ho, J., Salimans, T., Gritsenko, A., Chan, W., Norouzi, M., Fleet, D.J.: Video diffusion models. arXiv:2204.03458 (2022)"},{"key":"2_CR24","unstructured":"Hong, W., Ding, M., Zheng, W., Liu, X., Tang, J.: Cogvideo: large-scale pretraining for text-to-video generation via transformers. arXiv preprint arXiv:2205.15868 (2022)"},{"key":"2_CR25","unstructured":"Hu, E.J., et al.: Lora: low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"2_CR26","unstructured":"HuggingFace: Huggingface (2022). https:\/\/huggingface.co\/"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Khachatryan, L., et al.: Text2video-zero: text-to-image diffusion models are zero-shot video generators. arXiv preprint arXiv:2303.13439 (2023)","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"2_CR29","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et al.: Segment anything. arXiv:2304.02643 (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"2_CR31","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Ledig, C., et\u00a0al.: Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4681\u20134690 (2017)","DOI":"10.1109\/CVPR.2017.19"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Li, Y., Min, M., Shen, D., Carlson, D., Carin, L.: Video generation from text. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.12233"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Li, Z., Cao, M., Wang, X., Qi, Z., Cheng, M.M., Shan, Y.: Photomaker: customizing realistic human photos via stacked id embedding. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.00825"},{"key":"2_CR35","unstructured":"Liew, J.H., Yan, H., Zhang, J., Xu, Z., Feng, J.: Magicedit: high-fidelity and temporally coherent video editing. arXiv preprint arXiv:2308.14749 (2023)"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding dino: marrying dino with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"2_CR37","unstructured":"Liu, Z., et al.: Cones 2: customizable image synthesis with multiple subjects. arXiv preprint arXiv:2305.19327 (2023)"},{"key":"2_CR38","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, J., Chen, C., Lu, H.: Subject-diffusion: open domain personalized text-to-image generation without test-time fine-tuning. arXiv preprint arXiv:2307.11410 (2023)","DOI":"10.1145\/3641519.3657469"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Ni, H., Liu, Y., Huang, S.X., Xue, Y.: Cross-identity video motion retargeting with joint transformation and synthesis. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 412\u2013422 (2023)","DOI":"10.1109\/WACV56688.2023.00049"},{"key":"2_CR40","doi-asserted-by":"crossref","unstructured":"Ni, H., Shi, C., Li, K., Huang, S.X., Min, M.R.: Conditional image-to-video generation with latent flow diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18444\u201318455 (2023)","DOI":"10.1109\/CVPR52729.2023.01769"},{"key":"2_CR41","unstructured":"Ning, M., Li, M., Su, J., Salah, A.A., Ertugrul, I.O.: Elucidating the exposure bias in diffusion models. arXiv preprint arXiv:2308.15321 (2023)"},{"key":"2_CR42","unstructured":"Ning, M., Sangineto, E., Porrello, A., Calderara, S., Cucchiara, R.: Input perturbation reduces exposure bias in diffusion models. arXiv preprint arXiv:2301.11706 (2023)"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Pan, Y., Qiu, Z., Yao, T., Li, H., Mei, T.: To create what you tell: generating videos from captions. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 1789\u20131798 (2017)","DOI":"10.1145\/3123266.3127905"},{"key":"2_CR44","unstructured":"Podell, D., et al.: SDXL: improving latent diffusion models for high-resolution image synthesis. arXiv preprint arXiv:2307.01952 (2023)"},{"key":"2_CR45","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"2_CR46","unstructured":"Ranzato, M., Chopra, S., Auli, M., Zaremba, W.: Sequence level training with recurrent neural networks. arXiv preprint arXiv:1511.06732 (2015)"},{"key":"2_CR47","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"2_CR48","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"2_CR49","unstructured":"Ryu, S.: Low-rank adaptation for fast text-to-image diffusion fine-tuning (2023)"},{"key":"2_CR50","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR51","doi-asserted-by":"publisher","unstructured":"Schmidt, F.: Generalization in generation: a closer look at exposure bias. In: Birch, A., et al. (eds.) Proceedings of the 3rd Workshop on Neural Generation and Translation, pp. 157\u2013167. Association for Computational Linguistics, Hong Kong (2019). https:\/\/doi.org\/10.18653\/v1\/D19-5616. https:\/\/aclanthology.org\/D19-5616","DOI":"10.18653\/v1\/D19-5616"},{"key":"2_CR52","doi-asserted-by":"crossref","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: Animating arbitrary objects via deep motion transfer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2377\u20132386 (2019)","DOI":"10.1109\/CVPR.2019.00248"},{"key":"2_CR53","unstructured":"Siarohin, A., Lathuili\u00e8re, S., Tulyakov, S., Ricci, E., Sebe, N.: First order motion model for image animation. In: Conference on Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"2_CR54","doi-asserted-by":"crossref","unstructured":"Siarohin, A., Woodford, O., Ren, J., Chai, M., Tulyakov, S.: Motion representations for articulated animation. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01344"},{"key":"2_CR55","unstructured":"Singer, U., et\u00a0al.: Make-a-video: text-to-video generation without text-video data. arXiv preprint arXiv:2209.14792 (2022)"},{"key":"2_CR56","doi-asserted-by":"crossref","unstructured":"Tao, J., et al.: Structure-aware motion transfer with deformable anchor model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3637\u20133646 (2022)","DOI":"10.1109\/CVPR52688.2022.00362"},{"key":"2_CR57","unstructured":"Villegas, R., et al.: Phenaki: variable length video generation from open domain textual description. arXiv preprint arXiv:2210.02399 (2022)"},{"key":"2_CR58","unstructured":"Voynov, A., Chu, Q., Cohen-Or, D., Aberman, K.: P+: extended textual conditioning in text-to-image generation (2023)"},{"key":"2_CR59","unstructured":"Wang, Q., Bai, X., Wang, H., Qin, Z., Chen, A.: Instantid: zero-shot identity-preserving generation in seconds. arXiv preprint arXiv:2401.07519 (2024)"},{"key":"2_CR60","unstructured":"Wang, T.C., Liu, M.Y., Tao, A., Liu, G., Kautz, J., Catanzaro, B.: Few-shot video-to-video synthesis. In: Advances in Neural Information Processing Systems (NeurIPS) (2019)"},{"key":"2_CR61","unstructured":"Wang, T.C., et al.: Video-to-video synthesis. In: Advances in Neural Information Processing Systems (NeurIPS) (2018)"},{"key":"2_CR62","doi-asserted-by":"crossref","unstructured":"Wang, X., Xie, L., Dong, C., Shan, Y.: Real-esrgan: training real-world blind super-resolution with pure synthetic data. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1905\u20131914 (2021)","DOI":"10.1109\/ICCVW54120.2021.00217"},{"key":"2_CR63","unstructured":"Wang, Y., Yang, D., Bremond, F., Dantcheva, A.: Latent image animator: learning to animate images via latent space navigation. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=7r6kDq0mK"},{"key":"2_CR64","unstructured":"Wang, Z., et al.: Customvideo: customizing text-to-video generation with multiple subjects. arXiv preprint arXiv:2401.09962 (2024)"},{"key":"2_CR65","doi-asserted-by":"crossref","unstructured":"Wiles, O., Koepke, A., Zisserman, A.: X2face: a network for controlling face generation using images, audio, and pose codes. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 670\u2013686 (2018)","DOI":"10.1007\/978-3-030-01261-8_41"},{"key":"2_CR66","doi-asserted-by":"crossref","unstructured":"Wu, J.Z., et al.: Tune-a-video: one-shot tuning of image diffusion models for text-to-video generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7623\u20137633 (2023)","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"2_CR67","unstructured":"Wu, J.Z., et al.: CVPR 2023 text guided video editing competition. arXiv preprint arXiv: 2310.16003 (2023)"},{"key":"2_CR68","doi-asserted-by":"crossref","unstructured":"Xiao, G., Yin, T., Freeman, W.T., Durand, F., Han, S.: Fastcomposer: tuning-free multi-subject image generation with localized attention. arXiv preprint arXiv:2305.10431 (2023)","DOI":"10.1007\/s11263-024-02227-z"},{"key":"2_CR69","unstructured":"Xing, J., et\u00a0al.: Make-your-video: customized video generation using textual and structural guidance. arXiv preprint arXiv:2306.00943 (2023)"},{"key":"2_CR70","doi-asserted-by":"crossref","unstructured":"Xu, B., et al.: Move as you like: image animation in e-commerce scenario. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2759\u20132761 (2021)","DOI":"10.1145\/3474085.3478550"},{"key":"2_CR71","unstructured":"Yan, W., Zhang, Y., Abbeel, P., Srinivas, A.: VideoGPT: video generation using VQ-VAE and transformers. arXiv preprint arXiv:2104.10157 (2021)"},{"key":"2_CR72","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-adapter: text compatible image prompt adapter for text-to-image diffusion models (2023)"},{"key":"2_CR73","doi-asserted-by":"crossref","unstructured":"Yu, J., Wang, Y., Zhao, C., Ghanem, B., Zhang, J.: Freedom: training-free energy-guided conditional diffusion model. arXiv preprint arXiv:2303.09833 (2023)","DOI":"10.1109\/ICCV51070.2023.02118"},{"key":"2_CR74","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models"},{"key":"2_CR75","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"2_CR76","doi-asserted-by":"crossref","unstructured":"Zhao, J., Zhang, H.: Thin-plate spline motion model for image animation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3657\u20133666 (2022)","DOI":"10.1109\/CVPR52688.2022.00364"},{"key":"2_CR77","unstructured":"Zhou, D., Wang, W., Yan, H., Lv, W., Zhu, Y., Feng, J.: Magicvideo: efficient video generation with latent diffusion models. arXiv preprint arXiv:2211.11018 (2022)"},{"key":"2_CR78","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Zhou, D., Zhu, Z.L., Wang, Y., Hou, Q., Feng, J.: Maskdiffusion: boosting text-to-image consistency with conditional mask. arXiv preprint arXiv:2309.04399 (2023)","DOI":"10.1007\/s11263-024-02294-2"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92808-6_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T15:59:59Z","timestamp":1748361599000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92808-6_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031928079","9783031928086"],"references-count":78,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92808-6_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}