{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:15:40Z","timestamp":1765340140991,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62171282"],"award-info":[{"award-number":["62171282"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]},{"DOI":"10.13039\/501100003399","name":"Science and Technology Commission of Shanghai Municipality","doi-asserted-by":"publisher","award":["22DZ2229005"],"award-info":[{"award-number":["22DZ2229005"]}],"id":[{"id":"10.13039\/501100003399","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754543","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"9247-9256","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Position-LoRA: Enhanced Relation Customization through Structural Prior in Initial Latent Noise"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-7862-840X","authenticated-orcid":false,"given":"Yiming","family":"Li","sequence":"first","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0674-9296","authenticated-orcid":false,"given":"Peng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7635-9101","authenticated-orcid":false,"given":"Xiaokang","family":"Qin","sequence":"additional","affiliation":[{"name":"Ant Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7662-5850","authenticated-orcid":false,"given":"Hongwei","family":"Hu","sequence":"additional","affiliation":[{"name":"Ant Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1956-694X","authenticated-orcid":false,"given":"Jun","family":"Sun","sequence":"additional","affiliation":[{"name":"Shanghai Key Lab of Digital Media Processing and Transmission, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6508-4469","authenticated-orcid":false,"given":"Yi","family":"Xu","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3680528.3687604"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3592450"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_1_4_1","volume-title":"Multidiffusion: Fusing diffusion paths for controlled image generation.","author":"Bar-Tal Omer","year":"2023","unstructured":"Omer Bar-Tal, Lior Yariv, Yaron Lipman, and Tali Dekel. 2023. Multidiffusion: Fusing diffusion paths for controlled image generation. (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Training-free layout control with cross-attention guidance. arXiv preprint arXiv:2304.03373","author":"Chen Minghao","year":"2023","unstructured":"Minghao Chen, Iro Laina, and Andrea Vedaldi. 2023. Training-free layout control with cross-attention guidance. arXiv preprint arXiv:2304.03373 (2023)."},{"key":"e_1_3_2_1_6_1","first-page":"16222","article-title":"Diffusion self-guidance for controllable image generation","volume":"36","author":"Epstein Dave","year":"2023","unstructured":"Dave Epstein, Allan Jabri, Ben Poole, Alexei Efros, and Aleksander Holynski. 2023. Diffusion self-guidance for controllable image generation. Advances in Neural Information Processing Systems, Vol. 36 (2023), 16222-16239.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"European Conference on Computer Vision. Springer, 181-198","author":"Frenkel Yarden","year":"2024","unstructured":"Yarden Frenkel, Yael Vinker, Ariel Shamir, and Daniel Cohen-Or. 2024. Implicit style-content separation using b-lora. In European Conference on Computer Vision. Springer, 181-198."},{"volume-title":"The Eleventh International Conference on Learning Representations.","author":"Gal Rinon","key":"e_1_3_2_1_8_1","unstructured":"Rinon Gal, Yuval Alaluf, Yuval Atzmon, Or Patashnik, Amit Haim Bermano, Gal Chechik, and Daniel Cohen-or. [n.d.]. An Image is Worth One Word: Personalizing Text-to-Image Generation using Textual Inversion. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_9_1","volume-title":"Renoise: Real image inversion through iterative noising","author":"Garibi Daniel","year":"2024","unstructured":"Daniel Garibi, Or Patashnik, Andrey Voynov, Hadar Averbuch-Elor, and Daniel Cohen-Or. [n.d.]. Renoise: Real image inversion through iterative noising, 2024. URL https:\/\/arxiv. org\/abs\/2403.14602 ( [n.,d.])."},{"key":"e_1_3_2_1_10_1","volume-title":"Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626","author":"Hertz Amir","year":"2022","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2022. Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)."},{"key":"e_1_3_2_1_11_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_12_1","volume-title":"Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations.","author":"Hu Edward J","key":"e_1_3_2_1_13_1","unstructured":"Edward J Hu, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., [n.d.]. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_14_1","volume-title":"Kelvin CK Chan, and Ziwei Liu","author":"Huang Ziqi","year":"2023","unstructured":"Ziqi Huang, Tianxing Wu, Yuming Jiang, Kelvin CK Chan, and Ziwei Liu. 2023. ReVersion: Diffusion-based relation inversion from images. arXiv preprint arXiv:2303.13495 (2023)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"e_1_3_2_1_16_1","first-page":"73232","article-title":"Collaborative score distillation for consistent visual editing","volume":"36","author":"Kim Subin","year":"2023","unstructured":"Subin Kim, Kyungmin Lee, June Suk Choi, Jongheon Jeong, Kihyuk Sohn, and Jinwoo Shin. 2023a. Collaborative score distillation for consistent visual editing. Advances in Neural Information Processing Systems, Vol. 36 (2023), 73232-73257.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00708"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i4.28111"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00037"},{"key":"e_1_3_2_1_22_1","volume-title":"Dpm-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps. Advances in neural information processing systems","author":"Lu Cheng","year":"2022","unstructured":"Cheng Lu, Yuhao Zhou, Fan Bao, Jianfei Chen, Chongxuan Li, and Jun Zhu. 2022. Dpm-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps. Advances in neural information processing systems, Vol. 35 (2022), 5775-5787."},{"key":"e_1_3_2_1_23_1","volume-title":"Dpm-solver: Fast solver for guided sampling of diffusion probabilistic models. Machine Intelligence Research","author":"Lu Cheng","year":"2025","unstructured":"Cheng Lu, Yuhao Zhou, Fan Bao, Jianfei Chen, Chongxuan Li, and Jun Zhu. 2025. Dpm-solver: Fast solver for guided sampling of diffusion probabilistic models. Machine Intelligence Research (2025), 1-22."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612191"},{"key":"e_1_3_2_1_25_1","volume-title":"European Conference on Computer Vision. Springer, 93-109","author":"Mao Jiafeng","year":"2024","unstructured":"Jiafeng Mao, Xueting Wang, and Kiyoharu Aizawa. 2024. The lottery ticket hypothesis in denoising: Towards semantic-driven initialization. In European Conference on Computer Vision. Springer, 93-109."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00585"},{"key":"e_1_3_2_1_27_1","volume-title":"Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741","author":"Nichol Alex","year":"2021","unstructured":"Alex Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob McGrew, Ilya Sutskever, and Mark Chen. 2021. Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. PMLR, 8162-8171","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. In International Conference on Machine Learning. PMLR, 8162-8171."},{"key":"e_1_3_2_1_29_1","volume-title":"K-lora: Unlocking training-free fusion of any subject and style loras. arXiv preprint arXiv:2502.18461","author":"Ouyang Ziheng","year":"2025","unstructured":"Ziheng Ouyang, Zhen Li, and Qibin Hou. 2025. K-lora: Unlocking training-free fusion of any subject and style loras. arXiv preprint arXiv:2502.18461 (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01458"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00761"},{"key":"e_1_3_2_1_32_1","volume-title":"Not all noises are created equally: Diffusion noise selection and optimization. arXiv preprint arXiv:2407.14041","author":"Qi Zipeng","year":"2024","unstructured":"Zipeng Qi, Lichen Bai, Haoyi Xiong, and Zeke Xie. 2024. Not all noises are created equally: Diffusion noise selection and optimization. arXiv preprint arXiv:2407.14041 (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_34_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"e_1_3_2_1_37_1","volume-title":"European Conference on Computer Vision. Springer, 422-438","author":"Shah Viraj","year":"2024","unstructured":"Viraj Shah, Nataniel Ruiz, Forrester Cole, Erika Lu, Svetlana Lazebnik, Yuanzhen Li, and Varun Jampani. 2024. Ziplora: Any subject in any style by effectively merging loras. In European Conference on Computer Vision. Springer, 422-438."},{"key":"e_1_3_2_1_38_1","volume-title":"Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502","author":"Song Jiaming","year":"2020","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2020. Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)."},{"key":"e_1_3_2_1_39_1","volume-title":"Prompt Sliders for Fine-Grained Control, Editing and Erasing of Concepts in Diffusion Models. arXiv preprint arXiv:2409.16535","author":"Sridhar Deepak","year":"2024","unstructured":"Deepak Sridhar and Nuno Vasconcelos. 2024. Prompt Sliders for Fine-Grained Control, Editing and Erasing of Concepts in Diffusion Models. arXiv preprint arXiv:2409.16535 (2024)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3618315"},{"key":"e_1_3_2_1_41_1","volume-title":"MS-Diffusion: Multi-subject Zero-shot Image Personalization with Layout Guidance. In The Thirteenth International Conference on Learning Representations.","author":"Wang Xierui","year":"2024","unstructured":"Xierui Wang, Siming Fu, Qihan Huang, Wanggui He, and Hao Jiang. 2024. MS-Diffusion: Multi-subject Zero-shot Image Personalization with Layout Guidance. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681391"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01760"},{"key":"e_1_3_2_1_44_1","volume-title":"Single Trajectory Distillation for Accelerating Image and Video Style Transfer. arXiv preprint arXiv:2412.18945","author":"Xu Sijie","year":"2024","unstructured":"Sijie Xu, Runqi Wang, Wei Zhu, Dejia Song, Nemo Chen, Xu Tang, and Yao Hu. 2024. Single Trajectory Distillation for Accelerating Image and Video Style Transfer. arXiv preprint arXiv:2412.18945 (2024)."},{"key":"e_1_3_2_1_45_1","volume-title":"Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721","author":"Ye Hu","year":"2023","unstructured":"Hu Ye, Jun Zhang, Sibo Liu, Xiao Han, and Wei Yang. 2023. Ip-adapter: Text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)."},{"key":"e_1_3_2_1_46_1","volume-title":"Text-to-image diffusion model in generative ai: A survey. arXiv preprint arXiv:2303.07909","author":"Zhang Chenshuang","year":"2023","unstructured":"Chenshuang Zhang, Chaoning Zhang, Mengchun Zhang, and In So Kweon. 2023b. Text-to-image diffusion model in generative ai: A survey. arXiv preprint arXiv:2303.07909 (2023)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3658150"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_49_1","volume-title":"Catversion: Concatenating embeddings for diffusion-based text-to-image personalization","author":"Zhao Ruoyu","year":"2025","unstructured":"Ruoyu Zhao, Mingrui Zhu, Shiyin Dong, De Cheng, Nannan Wang, and Xinbo Gao. 2025. Catversion: Concatenating embeddings for diffusion-based text-to-image personalization. IEEE Transactions on Circuits and Systems for Video Technology (2025)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00651"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754543","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:12:58Z","timestamp":1765339978000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754543"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":50,"alternative-id":["10.1145\/3746027.3754543","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754543","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}