{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T07:01:34Z","timestamp":1764831694699,"version":"3.46.0"},"reference-count":28,"publisher":"Tech Science Press","issue":"2","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["CMC"],"published-print":{"date-parts":[[2025]]},"DOI":"10.32604\/cmc.2025.068500","type":"journal-article","created":{"date-parts":[[2025,9,5]],"date-time":"2025-09-05T08:51:17Z","timestamp":1757062277000},"page":"3965-3980","source":"Crossref","is-referenced-by-count":0,"title":["PolyDiffusion: A Multi-Objective Optimized Contour-to-Image Diffusion Framework"],"prefix":"10.32604","volume":"85","author":[{"given":"Yuzhen","family":"Liu","sequence":"first","affiliation":[]},{"given":"Jiasheng","family":"Yin","sequence":"additional","affiliation":[]},{"given":"Yixuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Jin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xiaolan","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Xiaoliang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"17807","published-online":{"date-parts":[[2025]]},"reference":[{"key":"ref1","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"579","article-title":"Frido: feature pyramid diffusion for complex scene image synthesis","volume":"37","author":"Fan","year":"2023"},{"key":"ref2","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","article-title":"Generative adversarial networks: an overview","volume":"35","author":"Creswell","year":"2018","journal-title":"IEEE Sig Process Magaz"},{"key":"ref3","series-title":"Forty-first International Conference on Machine Learning; 2024 Jul 21\u201327; Vienna, Austria","first-page":"12606","article-title":"Scaling rectified flow transformers for high-resolution image synthesis","author":"Esser"},{"key":"ref4","series-title":"Proceedings of the 2023 IEEE\/CVF International Conference on Computer Vision; 2023 Oct 1\u20136; Paris, France","first-page":"4195","article-title":"Scalable diffusion models with transformers","author":"Peebles"},{"key":"ref5","series-title":"Proceedings of the 2023 IEEE\/CVF International Conference on Computer Vision; 2023 Oct 1\u20136; Paris, France","first-page":"7226","article-title":"Layoutdiffusion: improving graphic layout generation by discrete diffusion probabilistic models","author":"Zhang"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"e12644","DOI":"10.1111\/coin.12644","article-title":"Contour wavelet diffusion: a fast and high-quality image generation model","volume":"40","author":"Ding","year":"2024","journal-title":"Comput Intell"},{"key":"ref7","unstructured":"Qin C, Zhang S, Yu N, Feng Y, Yang X, Zhou Y, et al. Unicontrol: a unified diffusion model for controllable visual generation in the wild. arXiv:2305.11147. 2023. doi:10.48550\/arXiv.2305.11147."},{"key":"ref8","doi-asserted-by":"crossref","unstructured":"Wang X, Darrell T, Rambhatla SS, Girdhar R, Misra I. InstanceDiffusion: instance-level control for image generation. arXiv:2402.03290. 2024. doi:10.48550\/arXiv.2402.03290.","DOI":"10.1109\/CVPR52733.2024.00596"},{"key":"ref9","series-title":"Proceedings of the 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision; 2024 Jan 3\u20138; Waikoloa, HI, USA","first-page":"4240","article-title":"What decreases editing capability? Domain-specific hybrid refinement for improved GAN inversion","author":"Cao"},{"key":"ref10","unstructured":"Cao P, Yang L, Liu D, Liu Z, Li S, Song Q. Lsap: rethinking inversion fidelity, perception and editability in gan latent space. arXiv:2209.12746. 2022. doi:10.48550\/arXiv.2209.12746."},{"key":"ref11","unstructured":"Kingma DP, Welling M. Auto-encoding variational bayes. arXiv:1312.6114. 2013. doi:10.48550\/arXiv.1312.6114."},{"key":"ref12","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M. Hierarchical text-conditional image generation with clip latents.  arXiv:2204.06125. 2022. doi:10.48550\/arXiv.2204.06125."},{"key":"ref13","series-title":"Proceedings of the 2019 IEEE\/CVF International Conference on Computer Vision; 2019 Oct 27\u2013Nov 2; Seoul, Republic of Korea","first-page":"4561","article-title":"Specifying object attributes and relations in interactive scene generation","author":"Ashual","year":"2019"},{"key":"ref14","series-title":"Proceedings of the 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2023 Jun 17\u201324; Vancouver, BC, Canada","first-page":"22428","article-title":"Smartbrush: text and shape guided object inpainting with diffusion model","author":"Xie"},{"key":"ref15","series-title":"Proceedings of the 2023 IEEE\/CVF International Conference on Computer Vision; 2023 Oct 1\u20136; Paris, France","first-page":"2174","article-title":"Zero-shot spatial layout conditioning for text-to-image diffusion models","author":"Couairon"},{"key":"ref16","series-title":"Advances in neural information processing systems","article-title":"Dataset diffusion: diffusion-based synthetic data generation for pixel-level semantic segmentation","author":"Nguyen","year":"2024"},{"key":"ref17","series-title":"Proceedings of the 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2023 Jun 17\u201324; Vancouver, BC, Canada","first-page":"22490","article-title":"Layoutdiffusion controllable diffusion model for layout-to-image generation","author":"Zheng"},{"key":"ref18","doi-asserted-by":"crossref","unstructured":"Singh J, Zhang J, Liu Q, Smith C, Lin Z, Zheng L. Smartmask: context aware high-fidelity mask generation for fine-grained object insertion and layout control. arXiv:2312.05039. 2023. doi:10.48550\/arXiv.2312.05039.","DOI":"10.1109\/CVPR52733.2024.00621"},{"key":"ref19","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, et al. An image is worth 16x16 words: transformers for image recognition at scale. arXiv:2010.11929. 2020. doi:10.48550\/arXiv.2010.11929."},{"key":"ref20","series-title":"Proceedings of the 2021 IEEE\/CVF International Conference on Computer Vision; 2021 Oct 10\u201317; Montreal, QC, Canada","first-page":"9992","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu"},{"key":"ref21","series-title":"Proceedings of the 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2023 Jun 17\u201324; Vancouver, BC, Canada","first-page":"18653","article-title":"Polyformer: referring image segmentation as sequential polygon generation","author":"Liu"},{"key":"ref22","series-title":"Proceedings of the 2017 IEEE International Conference on Computer Vision; 2017 Oct 22\u201329; Venice, Italy","first-page":"1501","article-title":"Arbitrary style transfer in real-time with adaptive instance normalization","author":"Huang"},{"key":"ref23","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Adv Neural Inform Process Syst"},{"key":"ref24","series-title":"Proceedings of the 2018 IEEE Conference on Computer Vision and Pattern Recognition; 2018 Jun 18\u201323; Salt Lake City, UT, USA","first-page":"1209","article-title":"Coco-stuff: thing and stuff classes in context","author":"Caesar"},{"key":"ref25","unstructured":"Everingham M, Van Gool L, Williams C, Winn J, Zisserman A. The PASCAL visual object classes challenge 2012 (VOC2012) Results [Internet]. 2012 [cited 2025 Aug 6]. Available from: http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/voc2012\/."},{"key":"ref26","series-title":"Proceedings of the 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition; 2022 Jun 18\u201324; New Orleans, LA, USA","first-page":"7783","article-title":"Interactive image synthesis with panoptic layout generation","author":"Wang"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"6248","DOI":"10.1109\/TIP.2023.3321465","article-title":"PLGAN: generative adversarial networks for power-line segmentation in aerial images","volume":"32","author":"Abdelfattah","year":"2023","journal-title":"IEEE Trans Image Process"},{"key":"ref28","unstructured":"Gu A, Dao T. Mamba: linear-time sequence modeling with selective state spaces. arXiv:2312.00752. 2023. doi:10.48550\/arXiv.2312.00752."}],"container-title":["Computers, Materials &amp; Continua"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/cdn.techscience.cn\/files\/cmc\/2025\/TSP_CMC-85-2\/TSP_CMC_68500\/TSP_CMC_68500.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T05:35:49Z","timestamp":1764826549000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.techscience.com\/cmc\/v85n2\/63844"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":28,"journal-issue":{"issue":"2","published-online":{"date-parts":[[2025]]},"published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.32604\/cmc.2025.068500","relation":{},"ISSN":["1546-2226"],"issn-type":[{"type":"electronic","value":"1546-2226"}],"subject":[],"published":{"date-parts":[[2025]]}}}