{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T19:09:21Z","timestamp":1762110561436,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62332010"],"award-info":[{"award-number":["62332010"]}]},{"name":"Key Laboratory of Science, Technology and Standard in Press Industry"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681380","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"4101-4109","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["FBSDiff: Plug-and-Play Frequency Band Substitution of Diffusion Features for Highly Controllable Text-Driven Image Translation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3618-043X","authenticated-orcid":false,"given":"Xiang","family":"Gao","sequence":"first","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0468-9576","authenticated-orcid":false,"given":"Jiaying","family":"Liu","sequence":"additional","affiliation":[{"name":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"doi-asserted-by":"publisher","key":"e_1_3_2_1_1_1","DOI":"10.1109\/CVPR52729.2023.01213"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_2_1","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"e_1_3_2_1_3_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot learners. Proceedings of the Advances in Neural Information Processing Systems 33 (2020), 1877--1901.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.1109\/ICCV48922.2021.01367"},{"key":"e_1_3_2_1_5_1","first-page":"25683","article-title":"Improving diffusion models for inverse problems using manifold constraints","volume":"35","author":"Chung Hyungjin","year":"2022","unstructured":"Hyungjin Chung, Byeongsu Sim, Dohoon Ryu, and Jong Chul Ye. 2022. Improving diffusion models for inverse problems using manifold constraints. Proceedings of the Advances in Neural Information Processing Systems 35 (2022), 25683--25696.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.1007\/978-3-031-19836-6_6"},{"key":"e_1_3_2_1_7_1","first-page":"8780","article-title":"Diffusion models beat gans on image synthesis","volume":"34","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Proceedings of the Advances in Neural Information Processing Systems 34 (2021), 8780--8794.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1109\/ICCV51070.2023.00683"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1109\/CVPR46437.2021.01268"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_10_1","DOI":"10.1109\/ICPR.2016.7900182"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Hertz Amir","year":"2023","unstructured":"Amir Hertz, Ron Mokady, Jay Tenenbaum, Kfir Aberman, Yael Pritch, and Daniel Cohen-Or. 2023. Prompt-to-prompt image editing with cross attention control. Proceedings of the International Conference on Learning Representations (2023)."},{"key":"e_1_3_2_1_12_1","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Proceedings of the Advances in Neural Information Processing Systems 33 (2020), 6840--6851.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","volume-title":"Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598","author":"Ho Jonathan","year":"2022","unstructured":"Jonathan Ho and Tim Salimans. 2022. Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_14_1","DOI":"10.1109\/ICCV.2017.167"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.1109\/CVPR52729.2023.00582"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Kwon Gihyun","year":"2022","unstructured":"Gihyun Kwon and Jong Chul Ye. 2022. Diffusion-based Image Translation using disentangled style and content representation. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","volume-title":"Qibin Hou, Yaxing Wang, and Jian Yang.","author":"Li Senmao","year":"2023","unstructured":"Senmao Li, Joost van de Weijer, Taihang Hu, Fahad Shahbaz Khan, Qibin Hou, Yaxing Wang, and Jian Yang. 2023. Stylediffusion: Prompt-embedding inversion for text-based editing. arXiv preprint arXiv:2303.15649 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Control Color: Multimodal Diffusion-based Interactive Image Colorization. arXiv preprint arXiv:2402.10855","author":"Liang Zhexin","year":"2024","unstructured":"Zhexin Liang, Zhaochen Li, Shangchen Zhou, Chongyi Li, and Chen Change Loy. 2024. Control Color: Multimodal Diffusion-based Interactive Image Colorization. arXiv preprint arXiv:2402.10855 (2024)."},{"key":"e_1_3_2_1_20_1","first-page":"5775","article-title":"Dpm-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps","volume":"35","author":"Lu Cheng","year":"2022","unstructured":"Cheng Lu, Yuhao Zhou, Fan Bao, Jianfei Chen, Chongxuan Li, and Jun Zhu. 2022. Dpm-solver: A fast ode solver for diffusion probabilistic model sampling in around 10 steps. Proceedings of the Advances in Neural Information Processing Systems 35 (2022), 5775--5787.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 11461--11471","author":"Lugmayr Andreas","unstructured":"Andreas Lugmayr, Martin Danelljan, Andres Romero, Fisher Yu, Radu Timofte, and L Repaint Van Gool. [n. d.]. Inpainting using denoising diffusion probabilistic models. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 11461--11471.","key":"e_1_3_2_1_21_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/CVPR52729.2023.00585"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1609\/aaai.v38i5.28226"},{"key":"e_1_3_2_1_24_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 16784--16804","author":"Nichol Alexander Quinn","year":"2022","unstructured":"Alexander Quinn Nichol, Prafulla Dhariwal, Aditya Ramesh, Pranav Shyam, Pamela Mishkin, Bob Mcgrew, Ilya Sutskever, and Mark Chen. 2022. GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models. In Proceedings of the International Conference on Machine Learning. PMLR, 16784--16804."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1145\/3588432.3591513"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"e_1_3_2_1_27_1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Podell Dustin","year":"2023","unstructured":"Dustin Podell, Zion English, Kyle Lacey, Andreas Blattmann, Tim Dockhorn, Jonas M\u00fcller, Joe Penna, and Robin Rombach. 2023. Sdxl: Improving latent diffusion models for high-resolution image synthesis. Proceedings of the International Conference on Learning Representations (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the International Conference on Machine Learning. PMLR, 8748-- 8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In Proceedings of the International Conference on Machine Learning. PMLR, 8748-- 8763."},{"key":"e_1_3_2_1_29_1","volume-title":"Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2","author":"Ramesh Aditya","year":"2022","unstructured":"Aditya Ramesh, Prafulla Dhariwal, Alex Nichol, Casey Chu, and Mark Chen. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1, 2 (2022), 3."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.1109\/CVPR52688.2022.01042"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1145\/3528233.3530757"},{"key":"e_1_3_2_1_32_1","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily L Denton, Kamyar Ghasemipour, Raphael Gontijo Lopes, Burcu Karagol Ayan, Tim Salimans, et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. Proceedings of the Advances in Neural Information Processing Systems 35 (2022), 36479--36494.","journal-title":"Proceedings of the Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_33_1","first-page":"4713","article-title":"Image super-resolution via iterative refinement","volume":"45","author":"Saharia Chitwan","year":"2022","unstructured":"Chitwan Saharia, Jonathan Ho, William Chan, Tim Salimans, David J Fleet, and Mohammad Norouzi. 2022. Image super-resolution via iterative refinement. IEEE Transactions on Pattern Analysis and Machine Intelligence 45, 4 (2022), 4713--4726.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.1109\/CVPR52733.2024.00453"},{"key":"e_1_3_2_1_35_1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Song Jiaming","year":"2021","unstructured":"Jiaming Song, Chenlin Meng, and Stefano Ermon. 2021. Denoising diffusion implicit models. Proceedings of the International Conference on Learning Representations (2021)."},{"key":"e_1_3_2_1_36_1","volume-title":"Design Booster: A Text- Guided Diffusion Model for Image Translation with Spatial Layout Preservation. arXiv preprint arXiv:2302.02284","author":"Sun Shiqi","year":"2023","unstructured":"Shiqi Sun, Shancheng Fang, Qian He, and Wei Liu. 2023. Design Booster: A Text- Guided Diffusion Model for Image Translation with Spatial Layout Preservation. arXiv preprint arXiv:2302.02284 (2023)."},{"key":"e_1_3_2_1_37_1","volume-title":"Diffss: Diffusion model for few-shot semantic segmentation. arXiv preprint arXiv:2307.00773","author":"Tan Weimin","year":"2023","unstructured":"Weimin Tan, Siyuan Chen, and Bo Yan. 2023. Diffss: Diffusion model for few-shot semantic segmentation. arXiv preprint arXiv:2307.00773 (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.1109\/CVPR52688.2022.01048"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_39_1","DOI":"10.1109\/CVPR52729.2023.00191"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1109\/ICCV48922.2021.00427"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1109\/ICCV51070.2023.00355"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_42_1","DOI":"10.1109\/CVPR.2018.00068"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1109\/CVPR52729.2023.00584"}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"MM '24","name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681380","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681380","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:44Z","timestamp":1750295864000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681380"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":43,"alternative-id":["10.1145\/3664647.3681380","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681380","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}