{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T09:35:43Z","timestamp":1770543343333,"version":"3.49.0"},"reference-count":57,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,2,1]],"date-time":"2025-02-01T00:00:00Z","timestamp":1738368000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["U20B2070"],"award-info":[{"award-number":["U20B2070"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["61832016"],"award-info":[{"award-number":["61832016"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"publisher","award":["62102162"],"award-info":[{"award-number":["62102162"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L221013"],"award-info":[{"award-number":["L221013"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Neural Netw. Learning Syst."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1109\/tnnls.2023.3342645","type":"journal-article","created":{"date-parts":[[2024,1,10]],"date-time":"2024-01-10T15:08:27Z","timestamp":1704899307000},"page":"3370-3383","source":"Crossref","is-referenced-by-count":34,"title":["DiffStyler: Controllable Dual Diffusion for Text-Driven Image Stylization"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1627-6584","authenticated-orcid":false,"given":"Nisha","family":"Huang","sequence":"first","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6433-2678","authenticated-orcid":false,"given":"Yuxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3975-2483","authenticated-orcid":false,"given":"Fan","family":"Tang","sequence":"additional","affiliation":[{"name":"Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Chongyang","family":"Ma","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"given":"Haibin","family":"Huang","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6502-145X","authenticated-orcid":false,"given":"Weiming","family":"Dong","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8343-9665","authenticated-orcid":false,"given":"Changsheng","family":"Xu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Multimodal Artificial Intelligence Systems (MAIS), Institute of Automation, Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.167"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2022.3143356"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3057892"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3230084"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2022.3233025"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00193"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00209"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530164"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01753"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2765202"},{"key":"ref14","first-page":"8780","article-title":"Diffusion models beat GANs on image synthesis","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Dhariwal"},{"key":"ref15","article-title":"Hierarchical text-conditional image generation with clip latents","author":"Ramesh","year":"2022","journal-title":"arXiv:2204.06125"},{"key":"ref16","article-title":"GLIDE: Towards photorealistic image generation and editing with text-guided diffusion models","author":"Nichol","year":"2021","journal-title":"arXiv:2112.10741"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548282"},{"key":"ref19","article-title":"Creative painting with latent diffusion models","author":"Wu","year":"2022","journal-title":"arXiv:2209.14697"},{"key":"ref20","article-title":"An image is worth one word: Personalizing text-to-image generation using textual inversion","author":"Gal","year":"2022","journal-title":"arXiv:2208.01618"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_41"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00092"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01104"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3149237"},{"key":"ref25","volume-title":"Disco Diffusion","author":"Diffusion","year":"2022"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1603.08155"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00510"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530736"},{"key":"ref30","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Radford"},{"key":"ref31","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium thermodynamics","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Sohl-Dickstein"},{"key":"ref32","first-page":"11895","article-title":"Generative modeling by estimating gradients of the data distribution","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Song"},{"key":"ref33","first-page":"6840","article-title":"Denoising diffusion probabilistic models","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Ho"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"ref35","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"35","author":"Saharia"},{"key":"ref36","first-page":"8162","article-title":"Improved denoising diffusion probabilistic models","volume-title":"Proc. Int. Conf. Mach. Learn. (ICML)","author":"Nichol"},{"key":"ref37","article-title":"Denoising diffusion implicit models","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Song"},{"key":"ref38","article-title":"Pseudo numerical methods for diffusion models on manifolds","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Liu"},{"key":"ref39","article-title":"Video ControlNet: Towards temporally consistent synthetic-to-real video translation using conditional image diffusion models","author":"Chu","year":"2023","journal-title":"arXiv:2305.19193"},{"key":"ref40","article-title":"PFB-Diff: Progressive feature blending diffusion for text-driven image editing","author":"Huang","year":"2023","journal-title":"arXiv:2306.16894"},{"key":"ref41","first-page":"10209","article-title":"Retraction notice: VideoFusion: Decomposed diffusion models for high-quality video generation","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Luo"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00356"},{"key":"ref43","volume-title":"V-Diffusion-PyTorch","author":"Katherine","year":"2022"},{"key":"ref44","volume-title":"Wikiart Dataset","author":"Volunteer Team","year":"2022"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58545-7_19"},{"key":"ref46","volume-title":"Simulacra Aesthetic Models","author":"Katherine","year":"2022"},{"key":"ref47","volume-title":"Simulacra Aesthetic Captions","author":"Pressman","year":"2022"},{"key":"ref48","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Represent. (ICLR)","author":"Dosovitskiy"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.87"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00658"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16208"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01755"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/3605548"},{"key":"ref56","volume-title":"Clip Retrieval System","author":"Romain","year":"2023"},{"key":"ref57","article-title":"Universal style transfer via feature transforms","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","volume":"30","author":"Li"}],"container-title":["IEEE Transactions on Neural Networks and Learning Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/5962385\/10877690\/10387416.pdf?arnumber=10387416","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,5]],"date-time":"2025-12-05T18:38:51Z","timestamp":1764959931000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10387416\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2]]},"references-count":57,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tnnls.2023.3342645","relation":{},"ISSN":["2162-237X","2162-2388"],"issn-type":[{"value":"2162-237X","type":"print"},{"value":"2162-2388","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2]]}}}