{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T17:26:03Z","timestamp":1778606763134,"version":"3.51.4"},"reference-count":221,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Competitive Research Fund of the University of Aizu, Japan"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3435541","type":"journal-article","created":{"date-parts":[[2024,7,29]],"date-time":"2024-07-29T19:43:21Z","timestamp":1722282201000},"page":"178401-178440","source":"Crossref","is-referenced-by-count":10,"title":["Exploring Progress in Text-to-Image Synthesis: An In-Depth Survey on the Evolution of Generative Adversarial Networks"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5498-8339","authenticated-orcid":false,"given":"Md","family":"Ahsan Habib","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Mawlana Bhashani Science and Technology University, Tangail, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7344-0838","authenticated-orcid":false,"given":"Md","family":"Anwar Hussen Wadud","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Mawlana Bhashani Science and Technology University, Tangail, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1057-0472","authenticated-orcid":false,"given":"Md","family":"Fazlul Karim Patwary","sequence":"additional","affiliation":[{"name":"Institute of Information Technology, Jahangirnagar University, Savar, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4417-8276","authenticated-orcid":false,"given":"Mohammad","family":"Motiur Rahman","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Mawlana Bhashani Science and Technology University, Tangail, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5738-1631","authenticated-orcid":false,"given":"M. F.","family":"Mridha","sequence":"additional","affiliation":[{"name":"Department of Computer Science, American International University-Bangladesh, Dhaka, Bangladesh"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8932-6477","authenticated-orcid":false,"given":"Yuichi","family":"Okuyama","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, The University of Aizu, Aizuwakamatsu, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7476-2468","authenticated-orcid":false,"given":"Jungpil","family":"Shin","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, The University of Aizu, Aizuwakamatsu, Japan"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/35090055"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3390\/app13085098"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-022-2385-x"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.19"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.728"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00457"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.09.013"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/app12178398"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2019.2921336"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref15","first-page":"2779","article-title":"Large scale adversarial representation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Donahue"},{"key":"ref16","article-title":"Conditional generative adversarial nets","author":"Mirza","year":"2014","journal-title":"arXiv:1411.1784"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3295748"},{"key":"ref18","first-page":"1060","article-title":"Generative adversarial text to image synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Reed"},{"key":"ref19","article-title":"The Caltech-UCSD birds-200-2011 dataset","author":"Wah","year":"2011"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.23919\/TST.2017.8195348"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2765202"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3301282"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2982224"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.11688"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1345"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2021.07.019"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1111\/jebm.12141"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261988"},{"key":"ref31","article-title":"GLIDE: Towards photorealistic image generation and editing with text-guided diffusion models","author":"Nichol","year":"2021","journal-title":"arXiv:2112.10741"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3342866"},{"key":"ref33","article-title":"MNIST handwritten digit database","author":"LeCun","year":"2010"},{"key":"ref34","first-page":"2642","article-title":"Conditional image synthesis with auxiliary classifier GANs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Odena"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.13"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1310.4546"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1080\/00437956.1954.11659520"},{"key":"ref38","article-title":"TAC-GAN-text conditioned auxiliary classifier generative adversarial network","author":"Dash","year":"2017","journal-title":"arXiv:1703.06412"},{"key":"ref39","first-page":"3294","article-title":"Skip-thought vectors","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kiros"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.629"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9207584"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.jjimei.2022.100095"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3134154"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00143"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00342"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_29"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2856256"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2023.03.023"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3569219.3569352"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3268869"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_41"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00649"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018312"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.618"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref58","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CRV.2019.00018"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref61","article-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014","journal-title":"arXiv:1409.0473"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.5555\/3045118.3045336"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.3390\/app13010109"},{"key":"ref65","first-page":"284","article-title":"Realistic image generation using region-phrase attention","volume-title":"Proc. Asian Conf. Mach. Learn.","author":"Huang"},{"key":"ref66","article-title":"A structured self-attentive sentence embedding","author":"Lin","year":"2017","journal-title":"arXiv:1703.03130"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00046"},{"key":"ref68","first-page":"2065","article-title":"Controllable text-to-image generation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Li"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1142\/S0218001493000339"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00243"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.100"},{"key":"ref73","article-title":"A learned representation for artistic style","author":"Dumoulin","year":"2016","journal-title":"arXiv:1610.07629"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01060"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013272"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.374"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00160"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/ICMEW.2019.00085"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00766"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref83","article-title":"Adversarial feature learning","author":"Donahue","year":"2016","journal-title":"arXiv:1605.09782"},{"key":"ref84","article-title":"Adversarially learned inference","author":"Dumoulin","year":"2016","journal-title":"arXiv:1606.00704"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00537"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01766"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00595"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/p15-1150"},{"key":"ref89","first-page":"2440","article-title":"End-to-end memory networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Sukhbaatar"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1147"},{"key":"ref91","article-title":"Conditional image generation and manipulation for user-specified content","author":"Stap","year":"2020","journal-title":"arXiv:2005.04909"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref93","article-title":"Large scale GAN training for high fidelity natural image synthesis","author":"Brock","year":"2018","journal-title":"arXiv:1809.11096"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1016\/B978-0-323-96104-2.00014-2"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2953753"},{"key":"ref97","article-title":"Progressive growing of GANs for improved quality, stability, and variation","author":"Karras","year":"2017","journal-title":"arXiv:1710.10196"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102904"},{"key":"ref99","article-title":"Network-to-network translation with conditional invertible neural networks","author":"Rombach","year":"2020","journal-title":"arXiv:2005.13580"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.32604\/csse.2023.027841"},{"key":"ref101","article-title":"NICE: Non-linear independent components estimation","author":"Dinh","year":"2014","journal-title":"arXiv:1410.8516"},{"key":"ref102","article-title":"Density estimation using real NVP","author":"Dinh","year":"2016","journal-title":"arXiv:1605.08803"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00044"},{"key":"ref104","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01092"},{"key":"ref105","article-title":"ChatPainter: Improving text to image generation using dialogue","author":"Sharma","year":"2018","journal-title":"arXiv:1802.08216"},{"issue":"4","key":"ref106","first-page":"500","article-title":"Beyond simple classifiers: Analyzing and improving performance","volume":"6","author":"Geman","year":"1984","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390684"},{"key":"ref109","article-title":"Generating multiple objects at spatially distinct locations","author":"Hinz","year":"2019","journal-title":"arXiv:1901.00686"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.285"},{"key":"ref111","article-title":"Leveraging visual question answering to improve text-to-image synthesis","author":"Frolov","year":"2020","journal-title":"arXiv:2010.14953"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00878"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01063"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-35320-8_16"},{"key":"ref115","first-page":"1","article-title":"Generating interpretable images with controllable structure","volume-title":"Proc. ICLR","author":"Reed"},{"key":"ref116","first-page":"1747","article-title":"Pixel recurrent neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Van Den Oord"},{"key":"ref117","first-page":"2912","article-title":"Parallel multiscale autoregressive density estimation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Reed"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00028"},{"key":"ref119","article-title":"Interactive image generation using scene graphs","author":"Mittal","year":"2019","journal-title":"arXiv:1905.03743"},{"key":"ref120","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16368"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3021209"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00833"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01245"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref127","first-page":"887","article-title":"Learn, imagine and create: Text-to-image generation from prior knowledge","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Qiao"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1145\/3391709"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00133"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.168"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00466"},{"key":"ref132","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58604-1_18"},{"key":"ref133","first-page":"3950","article-title":"PasteGAN: A semi-parametric method to generate image from scene graph","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Li"},{"key":"ref134","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00243"},{"key":"ref135","article-title":"A note on the evaluation of generative models","author":"Theis","year":"2015","journal-title":"arXiv:1511.01844"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00498"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1759"},{"key":"ref138","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2020.09.002"},{"key":"ref139","first-page":"2784","article-title":"Network-to-network translation with conditional invertible neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Rombach"},{"key":"ref140","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_29"},{"key":"ref142","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"Radford","year":"2015","journal-title":"arXiv:1511.06434"},{"key":"ref143","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"ref144","article-title":"Towards principled methods for training generative adversarial networks","author":"Arjovsky","year":"2017","journal-title":"arXiv:1701.04862"},{"key":"ref145","article-title":"GANs trained by a two time-scale update rule converge to a local Nash equilibrium","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Heusel"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.29099\/ijair.v2i1.42"},{"key":"ref147","first-page":"1","article-title":"Advances in neural information processing systems 28","volume-title":"Proc. 29th Annu. Conf. Neural Inf. Process. Syst.","author":"Cortes"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.308"},{"key":"ref149","article-title":"A note on the inception score","author":"Barratt","year":"2018","journal-title":"arXiv:1801.01973"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2018.10.009"},{"key":"ref151","article-title":"Demystifying MMD GANs","author":"Bi\u0144kowski","year":"2018","journal-title":"arXiv:1801.01401"},{"key":"ref152","first-page":"8124","article-title":"Classification accuracy score for conditional generative models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Ravuri"},{"key":"ref153","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref154","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref155","doi-asserted-by":"publisher","DOI":"10.3115\/1626355.1626389"},{"key":"ref156","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref157","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00687"},{"key":"ref158","article-title":"Text to image synthesis using generative adversarial network","author":"Tan","year":"2022"},{"key":"ref159","doi-asserted-by":"publisher","DOI":"10.1109\/ACCAI53970.2022.9752641"},{"key":"ref160","doi-asserted-by":"publisher","DOI":"10.24996\/ijs.2022.63.3.37"},{"key":"ref161","doi-asserted-by":"publisher","DOI":"10.14569\/IJACSA.2021.0120124"},{"key":"ref162","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref163","doi-asserted-by":"publisher","DOI":"10.1007\/s11831-021-09672-w"},{"key":"ref164","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref165","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.6114"},{"key":"ref166","first-page":"10600","article-title":"Generating diverse high-fidelity images with VQ-VAE-2","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Razavi"},{"key":"ref167","first-page":"4866","article-title":"Conditional image generation with PixelCNN decoders","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Van den Oord"},{"key":"ref168","article-title":"Generating high fidelity images with subscale pixel networks and multidimensional upscaling","author":"Menick","year":"2018","journal-title":"arXiv:1812.01608"},{"issue":"4","key":"ref169","first-page":"695","article-title":"Estimation of non-normalized statistical models by score matching","volume":"6","author":"Hyv\u00e4rinen","year":"2005","journal-title":"J. Mach. Learn. Res."},{"key":"ref170","first-page":"3233","article-title":"Generative modeling by estimating gradients of the data distribution","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Song"},{"key":"ref171","article-title":"Adversarial score matching and improved sampling for image generation","author":"Jolicoeur-Martineau","year":"2020","journal-title":"arXiv:2009.05475"},{"key":"ref172","article-title":"Score-based generative modeling through stochastic differential equations","author":"Song","year":"2020","journal-title":"arXiv:2011.13456"},{"key":"ref173","first-page":"4055","article-title":"Image transformer","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Parmar"},{"key":"ref174","doi-asserted-by":"publisher","DOI":"10.1109\/ICAC3N53548.2021.9725511"},{"key":"ref175","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01268"},{"issue":"14","key":"ref176","first-page":"71","article-title":"Dall-e: Creating images from text","volume":"8","author":"Reddy","year":"2021","journal-title":"UGC Care Group I J."},{"key":"ref177","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-021-0234-8"},{"key":"ref178","first-page":"1","article-title":"An overview of controllable image synthesis: Current challenges and future trends","volume":"1","author":"Huang","year":"2022","journal-title":"SSRN J."},{"key":"ref179","doi-asserted-by":"publisher","DOI":"10.1145\/3132515.3132522"},{"key":"ref180","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.249"},{"key":"ref181","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451656"},{"key":"ref182","first-page":"5228","article-title":"Assessing generative models via precision and recall","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Sajjadi"},{"key":"ref183","first-page":"3004","article-title":"Improved precision and recall metric for assessing generative models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Kynk\u00e4\u00e4nniemi"},{"key":"ref184","first-page":"7176","article-title":"Reliable fidelity and diversity metrics for generative models","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Naeem"},{"key":"ref185","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00068"},{"key":"ref186","article-title":"On the evaluation of conditional GANs","author":"DeVries","year":"2019","journal-title":"arXiv:1907.08175"},{"key":"ref187","first-page":"5517","article-title":"Hype: A benchmark for human eye perceptual evaluation of generative models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Zhou"},{"key":"ref188","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_21"},{"key":"ref189","article-title":"Semantic photo manipulation with a generative image prior","author":"Bau","year":"2020","journal-title":"arXiv:2005.07727"},{"key":"ref190","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.608"},{"key":"ref191","first-page":"2850","article-title":"Text-adaptive generative adversarial networks: Manipulating images with natural language","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"31","author":"Nam"},{"key":"ref192","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413505"},{"key":"ref193","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414017"},{"key":"ref194","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"ref195","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1951"},{"key":"ref196","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/276"},{"key":"ref197","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12233"},{"key":"ref198","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3370848"},{"key":"ref199","article-title":"Generative enhancement for 3D medical images","author":"Zhu","year":"2024","journal-title":"arXiv:2403.12852"},{"key":"ref200","first-page":"772","article-title":"Doodlebot: An educational robot for creativity and AI literacy","volume-title":"Proc. ACM\/IEEE Int. Conf. Human-Robot Interact.","author":"Williams"},{"key":"ref201","doi-asserted-by":"publisher","DOI":"10.1145\/3640544.3645227"},{"key":"ref202","doi-asserted-by":"publisher","DOI":"10.1016\/j.envsoft.2023.105896"},{"key":"ref203","doi-asserted-by":"publisher","DOI":"10.3233\/JCC240004"},{"key":"ref204","doi-asserted-by":"publisher","DOI":"10.1109\/jstsp.2024.3368661"},{"key":"ref205","doi-asserted-by":"publisher","DOI":"10.1016\/j.imu.2021.100603"},{"key":"ref206","doi-asserted-by":"publisher","DOI":"10.1109\/ECCE57851.2023.10101636"},{"key":"ref207","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIT60459.2023.10441585"},{"key":"ref208","doi-asserted-by":"publisher","DOI":"10.1109\/SII58957.2024.10417370"},{"key":"ref209","doi-asserted-by":"publisher","DOI":"10.19139\/soic-2310-5070-1896"},{"key":"ref210","doi-asserted-by":"publisher","DOI":"10.53799\/ajse.v22i3.781"},{"key":"ref211","doi-asserted-by":"publisher","DOI":"10.1007\/s40200-023-01321-2"},{"key":"ref212","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-18096-6"},{"key":"ref213","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127550"},{"key":"ref214","doi-asserted-by":"publisher","DOI":"10.1029\/2023SW003499"},{"key":"ref215","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2024.3350983"},{"key":"ref216","doi-asserted-by":"publisher","DOI":"10.1109\/NCIM59001.2023.10212608"},{"key":"ref217","doi-asserted-by":"publisher","DOI":"10.1109\/NCIM59001.2023.10212454"},{"key":"ref218","doi-asserted-by":"publisher","DOI":"10.1109\/ICICT4SD59951.2023.10303496"},{"key":"ref219","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.121255"},{"key":"ref220","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3364910"},{"key":"ref221","doi-asserted-by":"publisher","DOI":"10.1007\/s44336-024-00002-9"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10614157.pdf?arnumber=10614157","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,11]],"date-time":"2024-12-11T11:08:27Z","timestamp":1733915307000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10614157\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":221,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3435541","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}