{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:26:37Z","timestamp":1781018797000,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T00:00:00Z","timestamp":1774224000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["RS-2025-24535423"],"award-info":[{"award-number":["RS-2025-24535423"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["RS-2020-II201373"],"award-info":[{"award-number":["RS-2020-II201373"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3748522.3779967","type":"proceedings-article","created":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:17:49Z","timestamp":1781014669000},"page":"1020-1028","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Analyzing Coarse-to-fine Generation of Diffusion Models through Image Editing Perspectives"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8506-969X","authenticated-orcid":false,"given":"Seonggyeom","family":"Kim","sequence":"first","affiliation":[{"name":"Hanyang University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-6832-5921","authenticated-orcid":false,"given":"Minju","family":"Kim","sequence":"additional","affiliation":[{"name":"Hanyang University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7334-2485","authenticated-orcid":false,"given":"Minju","family":"Bang","sequence":"additional","affiliation":[{"name":"Hanyang University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5410-6391","authenticated-orcid":false,"given":"Dong-Kyu","family":"Chae","sequence":"additional","affiliation":[{"name":"Computer Science, Hanyang University, Seoul, Republic of Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"David Bau et al. 2017. Network dissection: quantifying interpretability of deep visual representations. In CVPR 6541\u20136549.","DOI":"10.1109\/CVPR.2017.354"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Jooyoung Choi et al. 2021. Ilvr: conditioning method for denoising diffusion probabilistic models. ICCV.","DOI":"10.1109\/ICCV48922.2021.01410"},{"key":"e_1_3_2_1_4_1","unstructured":"Jooyoung Choi et al. 2022. Perception prioritized training of diffusion models. In CVPR 11472\u201311481."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Yunjey Choi et al. 2020. Stargan v2: diverse image synthesis for multiple domains. In CVPR 8188\u20138197.","DOI":"10.1109\/CVPR42600.2020.00821"},{"key":"e_1_3_2_1_6_1","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. In NeurIPS 8780\u20138794."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Patrick Esser et al. 2023. Structure and content-guided video synthesis with diffusion models. arXiv preprint arXiv:2302.03011.","DOI":"10.1109\/ICCV51070.2023.00675"},{"key":"e_1_3_2_1_8_1","unstructured":"Amir Hertz et al. 2023. Prompt-to-prompt image editing with cross-attention control. In ICLR."},{"key":"e_1_3_2_1_9_1","unstructured":"Miko\u0142aj Bi\u0144kowski et al. 2018. Demystifying mmd gans. In ICLR."},{"key":"e_1_3_2_1_10_1","unstructured":"Wan-Cyuan Fan et al. 2023. Frido: feature pyramid diffusion for complex scene image synthesis. In AAAI."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Zhenliang He Meina Kan and Shiguang Shan. 2021. Eigengan: layer-wise eigen-learning for gans. In ICCV 14408\u201314417.","DOI":"10.1109\/ICCV48922.2021.01414"},{"key":"e_1_3_2_1_13_1","unstructured":"Martin Heusel et al. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. In NeurIPS."},{"key":"e_1_3_2_1_14_1","unstructured":"Jonathan Ho Ajay Jain and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. In NeurIPS 6840\u20136851."},{"key":"e_1_3_2_1_15_1","first-page":"1","article-title":"Cascaded diffusion models for high fidelity image generation","volume":"23","author":"Jonathan Ho","year":"2022","unstructured":"Jonathan Ho et al. 2022. Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res., 23, 47, 1\u201333.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Jonathan Ho et al. 2022. Video diffusion models. In NeurIPS.","DOI":"10.52202\/068431-0628"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Zhiyu Jin et al. 2023. Training-free diffusion model adaptation for variable-sized text-to-image synthesis. arXiv preprint arXiv:2306.08645.","DOI":"10.52202\/075280-3103"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Tero Karras Samuli Laine and Timo Aila. 2019. A style-based generator architecture for generative adversarial networks. In CVPR 4401\u20134410.","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_19_1","unstructured":"Tero Karras et al. 2022. Elucidating the design space of diffusion-based generative models. In NeurIPS."},{"key":"e_1_3_2_1_20_1","unstructured":"Tero Karras et al. 2020. Training generative adversarial networks with limited data. In NeurIPS 12104\u201312114."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Database Systems for Advanced Applications, 495\u2013504","author":"Kim Minju","year":"2024","unstructured":"Minju Kim, Seonggyeom Kim, and Dong-Kyu Chae. 2024. Unsupervised controllable generation of diffusion models with latent variables in vaes. In International Conference on Database Systems for Advanced Applications, 495\u2013504."},{"key":"e_1_3_2_1_22_1","unstructured":"Diederik Kingma et al. 2021. Variational diffusion models. In NeurIPS 21696\u201321707."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Gihyun Kwon and Jong Chul Ye. 2021. Diagonal attention and style-based gan for content-style disentanglement in image generation and translation. In ICCV 13980\u201313989.","DOI":"10.1109\/ICCV48922.2021.01372"},{"key":"e_1_3_2_1_24_1","unstructured":"Mingi Kwon Jaeseok Jeong and Youngjung Uh. 2023. Diffusion models already have a semantic latent space. In ICLR."},{"key":"e_1_3_2_1_25_1","volume-title":"NeurIPS 2022 Workshop on Score-Based Methods.","author":"Sangyun","unstructured":"Sangyun Lee et al. 2022. Progressive deblurring of diffusion models for coarse-to-fine image synthesis. In NeurIPS 2022 Workshop on Score-Based Methods."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Nan Liu et al. 2022. Compositional visual generation with composable diffusion models. In ECCV 423\u2013439.","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Ziwei Liu et al. 2015. Deep learning face attributes in the wild. In CVPR 3730\u20133738.","DOI":"10.1109\/ICCV.2015.425"},{"key":"e_1_3_2_1_28_1","unstructured":"Chenlin Meng et al. 2021. Sdedit: guided image synthesis and editing with stochastic differential equations. In ICLR."},{"key":"e_1_3_2_1_29_1","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved denoising diffusion probabilistic models. In ICML 8162\u20138171."},{"key":"e_1_3_2_1_30_1","unstructured":"Alexander Nichol et al. 2022. Glide: towards photorealistic image generation and editing with text-guided diffusion models. In ICML 16784\u201316804."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Gaurav Parmar et al. 2023. Zero-shot image-to-image translation. arXiv preprint arXiv:2302.03027.","DOI":"10.1145\/3588432.3591513"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"crossref","unstructured":"Konpat Preechakul et al. 2022. Diffusion autoencoders: toward a meaningful and decodable representation. In CVPR 10619\u201310629.","DOI":"10.1109\/CVPR52688.2022.01036"},{"key":"e_1_3_2_1_33_1","unstructured":"Alec Radford et al. 2021. Learning transferable visual models from natural language supervision. In ICML 8748\u20138763."},{"key":"e_1_3_2_1_34_1","unstructured":"Aditya Ramesh et al. 2022. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Elad Richardson et al. 2021. Encoding in style: a stylegan encoder for image-to-image translation. In CVPR 2287\u20132296.","DOI":"10.1109\/CVPR46437.2021.00232"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach et al. 2022. High-resolution image synthesis with latent diffusion models. In CVPR 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach et al. 2022. High-resolution image synthesis with latent diffusion models. In CVPR 10684\u201310695.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_38_1","volume-title":"ACM SIGGRAPH","author":"Chitwan","year":"2022","unstructured":"Chitwan Saharia et al. 2022. Palette: image-to-image diffusion models. In ACM SIGGRAPH 2022, 1\u201310."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Chitwan Saharia et al. 2022. Photorealistic text-to-image diffusion models with deep language understanding. In NeurIPS 36479\u201336494.","DOI":"10.52202\/068431-2643"},{"key":"e_1_3_2_1_40_1","unstructured":"Hiroshi Sasaki Chris G Willcocks and Toby P Breckon. 2021. Unit-ddpm: unpaired image translation with denoising diffusion probabilistic models. arXiv preprint arXiv:2104.05358."},{"key":"e_1_3_2_1_41_1","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556."},{"key":"e_1_3_2_1_42_1","unstructured":"Jascha Sohl-Dickstein et al. 2015. Deep unsupervised learning using nonequilibrium thermodynamics. In ICML 2256\u20132265."},{"key":"e_1_3_2_1_43_1","unstructured":"Jiaming Song Chenlin Meng and Stefano Ermon. 2021. Denoising diffusion implicit models. In ICLR."},{"key":"e_1_3_2_1_44_1","unstructured":"Xuan Su et al. 2022. Dual diffusion implicit bridges for image-to-image translation. In ICLR."},{"key":"e_1_3_2_1_45_1","unstructured":"Tengfei Wang et al. 2022. Pretraining is all you need for image-to-image translation. arXiv preprint arXiv:2205.12952."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Shaoan Xie et al. 2023. Smartbrush: text and shape guided object inpainting with diffusion model. In CVPR 22428\u201322437.","DOI":"10.1109\/CVPR52729.2023.02148"},{"key":"e_1_3_2_1_47_1","unstructured":"Fisher Yu et al. 2015. Lsun: construction of a large-scale image dataset using deep learning with humans in the loop. arXiv preprint arXiv:1506.03365."},{"key":"e_1_3_2_1_48_1","unstructured":"Jiwen Yu et al. 2023. Cross: diffusion model makes controllable robust and secure image steganography. arXiv preprint arXiv:2305.16936."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"Matthew D Zeiler and Rob Fergus. 2014. Visualizing and understanding convolutional networks. In ECCV 818\u2013833.","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Richard Zhang et al. 2018. The unreasonable effectiveness of deep features as a perceptual metric. In CVPR 586\u2013595.","DOI":"10.1109\/CVPR.2018.00068"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"crossref","unstructured":"Jiapeng Zhu et al. 2020. In-domain gan inversion for real image editing. In ECCV 592\u2013608.","DOI":"10.1007\/978-3-030-58520-4_35"}],"event":{"name":"SAC '26: 41st ACM\/SIGAPP Symposium on Applied Computing","location":"Grand Hotel Palace Thessaloniki Greece","acronym":"SAC '26","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 41st ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748522.3779967","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:33:32Z","timestamp":1781015612000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748522.3779967"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,23]]},"references-count":51,"alternative-id":["10.1145\/3748522.3779967","10.1145\/3748522"],"URL":"https:\/\/doi.org\/10.1145\/3748522.3779967","relation":{},"subject":[],"published":{"date-parts":[[2026,3,23]]},"assertion":[{"value":"2026-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}