{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T05:30:02Z","timestamp":1774589402321,"version":"3.50.1"},"reference-count":133,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T00:00:00Z","timestamp":1728086400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T00:00:00Z","timestamp":1728086400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SN COMPUT. SCI."],"DOI":"10.1007\/s42979-024-03289-z","type":"journal-article","created":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T05:01:45Z","timestamp":1728104505000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Text Conditioned Generative Adversarial Networks Generating Images and Videos: A Critical Review"],"prefix":"10.1007","volume":"5","author":[{"given":"Rayeesa","family":"Mehmood","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6656-005X","authenticated-orcid":false,"given":"Rumaan","family":"Bashir","sequence":"additional","affiliation":[]},{"given":"Kaiser J.","family":"Giri","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,5]]},"reference":[{"key":"3289_CR1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cosrev.2020.100285","volume":"38","author":"G Harshvardhan","year":"2020","unstructured":"Harshvardhan G, Gourisaria MK, Pandey M, Rautaray SS. A comprehensive survey and analysis of generative models in machine learning. Comput Sci Rev. 2020;38: 100285.","journal-title":"Comput Sci Rev"},{"key":"3289_CR2","doi-asserted-by":"crossref","unstructured":"Fischer A, Igel C. An introduction to restricted Boltzmann machines. In: Iberoamerican congress on pattern recognition. Springer. 2012. pp. 14\u201336","DOI":"10.1007\/978-3-642-33275-3_2"},{"issue":"5","key":"3289_CR3","doi-asserted-by":"publisher","first-page":"5947","DOI":"10.4249\/scholarpedia.5947","volume":"4","author":"GE Hinton","year":"2009","unstructured":"Hinton GE. Deep belief networks. Scholarpedia. 2009;4(5):5947.","journal-title":"Scholarpedia"},{"key":"3289_CR4","unstructured":"Salakhutdinov R, Larochelle H. Efficient learning of deep boltzmann machines. In: Proceedings of the thirteenth international conference on artificial intelligence and statistics, JMLR workshop and conference proceedings. 2010 pp. 693\u2013700"},{"key":"3289_CR5","doi-asserted-by":"crossref","unstructured":"Oussidi A, Elhassouny A. Deep generative models: survey. In: 2018 international conference on intelligent systems and computer vision (ISCV), IEEE. 2018. pp. 1\u20138","DOI":"10.1109\/ISACV.2018.8354080"},{"key":"3289_CR6","unstructured":"Kingma DP, Welling M. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114. 2013"},{"key":"3289_CR7","first-page":"1","volume":"2","author":"N Aldausari","year":"2022","unstructured":"Aldausari N, Sowmya A, Marcus N, Mohammadi G. Video generative adversarial networks: a review. ACM Comput Surveys. 2022;2:1\u201325.","journal-title":"ACM Comput Surveys"},{"issue":"2","key":"3289_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3474838","volume":"13","author":"N Gao","year":"2022","unstructured":"Gao N, Xue H, Shao W, Zhao S, Qin KK, Prabowo A, Rahaman MS, Salim FD. Generative adversarial networks for spatio- temporal data: a survey. ACM Trans Intell Syst Technol. 2022;13(2):1\u201325.","journal-title":"ACM Trans Intell Syst Technol"},{"key":"3289_CR9","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.2661","author":"I Goodfellow","year":"2014","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y. Generative adversarial nets. Adv Neural Inf Process Syst. 2014. https:\/\/doi.org\/10.48550\/arXiv.1406.2661.","journal-title":"Adv Neural Inf Process Syst"},{"issue":"2","key":"3289_CR10","first-page":"48","volume":"121","author":"M Giles","year":"2018","unstructured":"Giles M. The ganfather: the man who\u2019s given machines the gift of imagination. MIT Technol Rev. 2018;121(2):48\u201353.","journal-title":"MIT Technol Rev"},{"issue":"8","key":"3289_CR11","first-page":"1455","volume":"7","author":"P Jain","year":"2020","unstructured":"Jain P, Jayaswal T. Generative adversarial training and its utilization for text to image generation: a survey and analysis. J Crit Rev. 2020;7(8):1455\u201363.","journal-title":"J Crit Rev"},{"issue":"4","key":"3289_CR12","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1002\/widm.1345","volume":"10","author":"J Agnese","year":"2020","unstructured":"Agnese J, Herrera J, Tao H, Zhu X. A survey and taxonomy of adversarial neural networks for text-to-image synthesis. Wiley Interdiscip Rev Data Mining Knowl Discov. 2020;10(4):1345.","journal-title":"Wiley Interdiscip Rev Data Mining Knowl Discov"},{"issue":"4","key":"3289_CR13","first-page":"1","volume":"11","author":"M Vuppuluri","year":"2017","unstructured":"Vuppuluri M, Dash A. Survey on generative adversarial networks. Int J Eng Res Comput Sci Eng. 2017;11(4):1\u201349.","journal-title":"Int J Eng Res Comput Sci Eng"},{"key":"3289_CR14","doi-asserted-by":"publisher","first-page":"3313","DOI":"10.1109\/TKDE.2021.3130191","volume":"35","author":"J Gui","year":"2021","unstructured":"Gui J, Sun Z, Wen Y, Tao D, Ye J. A review on generative adversarial networks: Algorithms, theory, and applications. IEEE Trans Knowl Data Eng. 2021;35:3313\u201332.","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"3289_CR15","unstructured":"Wang Z, She Q, Ward T.E. Generative adversarial networks: a survey and taxonomy. arXiv preprint arXiv:1906.01529. 2019"},{"issue":"3","key":"3289_CR16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3446374","volume":"54","author":"D Saxena","year":"2021","unstructured":"Saxena D, Cao J. Generative adversarial networks (gans) challenges, solutions, and future directions. ACM Comput Surveys (CSUR). 2021;54(3):1\u201342.","journal-title":"ACM Comput Surveys (CSUR)"},{"key":"3289_CR17","unstructured":"Huang H, Yu PS, Wang C. An introduction to image synthesis with generative adversarial nets. arXiv preprint arXiv:1803.04469. 2018"},{"key":"3289_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1155\/2020\/6748430","volume":"2020","author":"L Jin","year":"2020","unstructured":"Jin L, Tan F, Jiang S. Generative adversarial network technologies and applications in computer vision. Comput Intell Neurosci. 2020;2020:1\u201317.","journal-title":"Comput Intell Neurosci"},{"issue":"37\u201338","key":"3289_CR19","first-page":"27407","volume":"79","author":"S Akanksha","year":"2020","unstructured":"Akanksha S, Neeru J, Rana P. Potential of generative adversarial net algorithms in image and video processing applications\u2014a survey. Multimedia Tools Appl. 2020;79(37\u201338):27407\u201337.","journal-title":"Multimedia Tools Appl"},{"key":"3289_CR20","unstructured":"Arjovsky M, Chintala S, Bottou L. Wasserstein generative adversarial networks. In: International Conference on Machine Learning, PMLR. 2017. pp. 214\u2013223"},{"key":"3289_CR21","doi-asserted-by":"crossref","unstructured":"Mao X, Li Q, Xie H, Lau RY, Wang Z, Paul Smolley S. Least squares generative adversarial networks. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 2794\u20132802","DOI":"10.1109\/ICCV.2017.304"},{"issue":"5","key":"3289_CR22","doi-asserted-by":"publisher","first-page":"1118","DOI":"10.1007\/s11263-019-01265-2","volume":"128","author":"G-J Qi","year":"2020","unstructured":"Qi G-J. Loss-sensitive generative adversarial networks on lipschitz densities. Int J Comput Vision. 2020;128(5):1118\u201340.","journal-title":"Int J Comput Vision"},{"key":"3289_CR23","unstructured":"Metz L, Poole B, Pfau D, Sohl-Dickstein J. Unrolled generative adversarial networks. arXiv preprint arXiv:1611.02163. 2016"},{"key":"3289_CR24","unstructured":"Che T, Li Y, Jacob AP, Bengio Y, Li W. Mode regularized generative adversarial networks. arXiv preprint arXiv:1612.02136. 2016"},{"key":"3289_CR25","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1606.03498","author":"T Salimans","year":"2016","unstructured":"Salimans T, Goodfellow I, Zaremba W, Cheung V, Radford A, Chen X. Improved techniques for training gans. Adv Neural Inf Process Syst. 2016. https:\/\/doi.org\/10.48550\/arXiv.1606.03498.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR26","first-page":"6629","volume":"30","author":"M Heusel","year":"2017","unstructured":"Heusel M, Ramsauer H, Unterthiner T, Nessler B, Hochreiter S. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv Neural Inf Process Syst. 2017;30:6629\u201340.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR27","unstructured":"Mirza M, Osindero S. Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784. 2014"},{"issue":"2","key":"3289_CR28","doi-asserted-by":"publisher","first-page":"525","DOI":"10.1007\/s11831-019-09388-y","volume":"28","author":"H Alqahtani","year":"2021","unstructured":"Alqahtani H, Kavakli-Thorne M, Kumar G. Applications of generative adversarial networks (gans): an updated review. Arch Comput Methods Eng. 2021;28(2):525\u201352.","journal-title":"Arch Comput Methods Eng"},{"key":"3289_CR29","doi-asserted-by":"publisher","first-page":"36322","DOI":"10.1109\/ACCESS.2019.2905015","volume":"7","author":"Z Pan","year":"2019","unstructured":"Pan Z, Yu W, Yi X, Khan A, Yuan F, Zheng Y. Recent progress on generative adversarial networks (gans): a survey. IEEE Access. 2019;7:36322\u201333.","journal-title":"IEEE Access"},{"issue":"6","key":"3289_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2020.100089","volume":"1","author":"ME Tschuchnig","year":"2020","unstructured":"Tschuchnig ME, Oostingh GJ, Gadermayr M. Generative adversarial networks in digital pathology: a survey on trends and future potential. Patterns. 2020;1(6): 100089.","journal-title":"Patterns"},{"issue":"2","key":"3289_CR31","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1080\/02564602.2020.1854058","volume":"39","author":"A Arora","year":"2022","unstructured":"Arora A. Shantanu: a review on application of gans in the cybersecurity domain. IETE Tech Rev. 2022;39(2):433\u201341.","journal-title":"IETE Tech Rev"},{"key":"3289_CR32","unstructured":"Zhang H, Goodfellow I, Metaxas D, Odena A. Self-attention generative adversarial networks. In: International conference on machine learning, PMLR. 2019. pp. 7354\u20137363"},{"key":"3289_CR33","doi-asserted-by":"crossref","unstructured":"Zhu J-Y, Park T, Isola P, Efros AA. Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 2223\u20132232","DOI":"10.1109\/ICCV.2017.244"},{"key":"3289_CR34","doi-asserted-by":"crossref","unstructured":"Isola P, Zhu J-Y, Zhou T, Efros AA. Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. pp. 1125\u20131134","DOI":"10.1109\/CVPR.2017.632"},{"key":"3289_CR35","unstructured":"Kim T, Cha M, Kim H, Lee JK, Kim J. Learning to discover cross- domain relations with generative adversarial networks. In: International conference on machine learning, PMLR 2017. pp. 1857\u20131865"},{"key":"3289_CR36","doi-asserted-by":"crossref","unstructured":"Yi Z, Zhang H, Tan P, Gong M. Dualgan: unsupervised dual learning for image-to-image translation. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 2849\u20132857","DOI":"10.1109\/ICCV.2017.310"},{"key":"3289_CR37","doi-asserted-by":"crossref","unstructured":"Yu J, Lin Z, Yang J, Shen X, Lu X, Huang TS. Generative image inpainting with contextual attention. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. pp. 5505\u20135514","DOI":"10.1109\/CVPR.2018.00577"},{"key":"3289_CR38","unstructured":"Nazeri K, Ng E, Joseph T, Qureshi FZ, Ebrahimi M. Edgecon- nect: Generative image inpainting with adversarial edge learning. arXiv preprint arXiv:1901.00212. 2019"},{"key":"3289_CR39","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Husz\u00b4ar F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z, et al. Photo-realistic single image super-resolution using a generative adversarial network. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. pp. 4681\u20134690","DOI":"10.1109\/CVPR.2017.19"},{"key":"3289_CR40","doi-asserted-by":"crossref","unstructured":"Li W, Zhou K, Qi L, Lu L, Jiang N, Lu J, Jia J. Best- buddy gans for highly detailed image super-resolution. arXiv preprint arXiv:2103.15295. 2021","DOI":"10.1609\/aaai.v36i2.20030"},{"key":"3289_CR41","doi-asserted-by":"crossref","unstructured":"Wang X, Yu K, Wu S, Gu J, Liu Y, Dong C, Qiao Y, Change Loy C. Esrgan: enhanced super-resolution generative adversarial networks. In: Proceedings of the European conference on computer vision (ECCV) workshops. 2018","DOI":"10.1007\/978-3-030-11021-5_5"},{"key":"3289_CR42","unstructured":"Guan J, Pan C, Li S, Yu D. Srdgan: learning the noise prior for super resolution with dual generative adversarial networks. arXiv preprint arXiv:1903.11821. 2019"},{"key":"3289_CR43","unstructured":"Mathieu M, Couprie C, LeCun Y. Deep multi-scale video prediction beyond mean square error. arXiv preprint arXiv:1511.05440. 2015"},{"key":"3289_CR44","doi-asserted-by":"crossref","unstructured":"Lu C, Hirsch M, Scholkopf B. Flexible spatio-temporal networks for video prediction. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. pp. 6523\u20136531","DOI":"10.1109\/CVPR.2017.230"},{"key":"3289_CR45","doi-asserted-by":"crossref","unstructured":"Vondrick C, Torralba A. Generating the future with adversarial trans- formers. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. pp. 1020\u20131028","DOI":"10.1109\/CVPR.2017.319"},{"key":"3289_CR46","doi-asserted-by":"publisher","first-page":"114021","DOI":"10.1109\/ACCESS.2019.2933910","volume":"7","author":"J Liu","year":"2019","unstructured":"Liu J, Li W, Pei H, Wang Y, Qu F, Qu Y, Chen Y. Identity preserving generative adversarial network for cross-domain person re- identification. IEEE Access. 2019;7:114021\u201332.","journal-title":"IEEE Access"},{"key":"3289_CR47","doi-asserted-by":"crossref","unstructured":"Li J, Liang X, Wei Y, Xu T, Feng J, Yan S. Perceptual generative adversarial networks for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2017. pp. 1222\u20131230","DOI":"10.1109\/CVPR.2017.211"},{"key":"3289_CR48","doi-asserted-by":"crossref","unstructured":"Antipov G, Baccouche M, Dugelay J-L. Face aging with conditional generative adversarial networks. In: 2017 IEEE international conference on image processing (ICIP), IEEE. 2017. pp. 2089\u20132093","DOI":"10.1109\/ICIP.2017.8296650"},{"key":"3289_CR49","doi-asserted-by":"crossref","unstructured":"Fabbri M, Calderara S, Cucchiara R. Generative adversarial models for people attribute recognition in surveillance. In: 2017 14th IEEE international conference on advanced video and signal based surveillance (AVSS), IEEE. 2017. pp. 1\u20136","DOI":"10.1109\/AVSS.2017.8078521"},{"key":"3289_CR50","doi-asserted-by":"crossref","unstructured":"Dong H-W, Hsiao, W-Y, Yang L-C, Yang Y-H. Musegan: Multi- track sequential generative adversarial networks for symbolic music generation and accompaniment. In: Proceedings of the AAAI conference on artificial intelligence, vol. 32. 2018","DOI":"10.1609\/aaai.v32i1.11312"},{"key":"3289_CR51","unstructured":"Yang L-C, Chou S-Y, Yang Y-H. Midinet: a convolutional generative adversarial network for symbolic-domain music generation. arXiv preprint arXiv:1703.10847 (2017)"},{"key":"3289_CR52","doi-asserted-by":"crossref","unstructured":"Pascual S, Bonafonte A. Serra J. Segan: speech enhancement generative adversarial network. arXiv preprint arXiv:1703.09452. 2017","DOI":"10.21437\/Interspeech.2017-1428"},{"key":"3289_CR53","doi-asserted-by":"crossref","unstructured":"Higuchi T, Kinoshita K, Delcroix M, Nakatani T. Adversarial train- ing for data-driven speech enhancement without parallel corpus. In: 2017 IEEE automatic speech recognition and understanding workshop (ASRU), IEEE. 2017. pp. 40\u201347","DOI":"10.1109\/ASRU.2017.8268914"},{"issue":"1","key":"3289_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s13638-018-1196-0","volume":"2018","author":"S Qin","year":"2018","unstructured":"Qin S, Jiang T. Improved wasserstein conditional generative adversarial network speech enhancement. EURASIP J Wirel Commun Netw. 2018;2018(1):1\u201310.","journal-title":"EURASIP J Wirel Commun Netw"},{"issue":"11","key":"3289_CR55","doi-asserted-by":"publisher","first-page":"5005","DOI":"10.1007\/s00034-018-0798-4","volume":"37","author":"Z-X Li","year":"2018","unstructured":"Li Z-X, Dai L-R, Song Y, McLoughlin I. A conditional generative model for speech enhancement. Circuits Syst Signal Process. 2018;37(11):5005\u201322.","journal-title":"Circuits Syst Signal Process"},{"key":"3289_CR56","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H. Generative adversarial text to image synthesis. In: International Conference on Machine Learning, PMLR. 2016. pp. 1060\u20131069"},{"key":"3289_CR57","doi-asserted-by":"crossref","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN. Stackgan: text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 5907\u20135915","DOI":"10.1109\/ICCV.2017.629"},{"key":"3289_CR58","doi-asserted-by":"crossref","unstructured":"Shetty R, Rohrbach M, Anne Hendricks L, Fritz M, Schiele B. Speaking the same language: Matching machine to human captions by adversarial training. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 4135\u20134144","DOI":"10.1109\/ICCV.2017.445"},{"key":"3289_CR59","doi-asserted-by":"crossref","unstructured":"Dai B, Fidler S, Urtasun R, Lin D. Towards diverse and natural image descriptions via a conditional gan. In: Proceedings of the IEEE international conference on computer vision. 2017. pp. 2970\u20132979","DOI":"10.1109\/ICCV.2017.323"},{"key":"3289_CR60","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1016\/j.neucom.2019.12.073","volume":"387","author":"Y Wei","year":"2020","unstructured":"Wei Y, Wang L, Cao H, Shao M, Wu C. Multi-attention generative adversarial network for image captioning. Neurocomputing. 2020;387:91\u20139.","journal-title":"Neurocomputing"},{"key":"3289_CR61","doi-asserted-by":"crossref","unstructured":"Pan Y, Qiu Z, Yao T, Li H, Mei T. To create what you tell: Generating videos from captions. In: Proceedings of the 25th ACM international conference on multimedia. 2017. pp. 1789\u20131798","DOI":"10.1145\/3123266.3127905"},{"key":"3289_CR62","doi-asserted-by":"crossref","unstructured":"Balaji Y, Min MR, Bai B, Chellappa R, Graf HP Conditional gan with discriminative filter generation for text-to-video synthesis. In: IJCAI. 2019 vol. 1. pp. 2","DOI":"10.24963\/ijcai.2019\/276"},{"key":"3289_CR63","doi-asserted-by":"crossref","unstructured":"Park JS, Rohrbach M, Darrell T, Rohrbach A. Adversarial inference for multi-sentence video description. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019. pp. 6598\u20136608","DOI":"10.1109\/CVPR.2019.00676"},{"key":"3289_CR64","doi-asserted-by":"crossref","unstructured":"Mahdizadehaghdam S, Panahi A, Krim H. Sparse generative adversarial network. In: Proceedings of the IEEE\/CVF international conference on computer vision Workshops. 2019","DOI":"10.1109\/ICCVW.2019.00369"},{"key":"3289_CR65","unstructured":"Radford A, Metz L, Chintala S. Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434. 2015"},{"key":"3289_CR66","first-page":"217","volume":"29","author":"SE Reed","year":"2016","unstructured":"Reed SE, Akata Z, Mohan S, Tenka S, Schiele B, Lee H. Learning what and where to draw. Adv Neural Inf Process Syst. 2016;29:217\u201325.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR67","doi-asserted-by":"crossref","unstructured":"Dong H, Zhang J, McIlwraith D, Guo Y. I2t2i: learning text to image synthesis with textual data augmentation. In: 2017 IEEE international conference on image processing (ICIP), IEEE. 2017. pp. 2015\u20132019","DOI":"10.1109\/ICIP.2017.8296635"},{"issue":"8","key":"3289_CR68","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","volume":"41","author":"H Zhang","year":"2018","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN. Stackgan++: realistic image synthesis with stacked generative adversarial networks. IEEE Trans Pattern Anal Mach Intell. 2018;41(8):1947\u201362.","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"3289_CR69","doi-asserted-by":"crossref","unstructured":"Xu T, Zhang P, Huang Q, Zhang H, Gan Z, Huang X, He X. Attngan: Fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. pp. 1316\u20131324","DOI":"10.1109\/CVPR.2018.00143"},{"key":"3289_CR70","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xie Y, Yang L. Photographic text-to-image synthesis with a hierarchically-nested adversarial network. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. pp. 6199\u20136208","DOI":"10.1109\/CVPR.2018.00649"},{"key":"3289_CR71","doi-asserted-by":"crossref","unstructured":"Sun J, Zhou Y, Zhang B. Resfpa-gan: Text-to-image synthesis with generative adversarial network based on residual block feature pyramid attention. In: 2019 IEEE International conference on advanced robotics and its social impacts (ARSO), IEEE. 2019. pp. 317\u2013322","DOI":"10.1109\/ARSO46408.2019.8948717"},{"key":"3289_CR72","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107384","volume":"110","author":"L Gao","year":"2021","unstructured":"Gao L, Chen D, Zhao Z, Shao J, Shen HT. Lightweight dynamic conditional gan with pyramid attention for text-to-image synthesis. Pattern Recogn. 2021;110: 107384.","journal-title":"Pattern Recogn"},{"key":"3289_CR73","doi-asserted-by":"crossref","unstructured":"Schulze H, Yaman D, Waibel A. Cagan: Text-to-image generation with combined attention gans. arXiv preprint arXiv:2104.12663. 2021","DOI":"10.1007\/978-3-030-92659-5_25"},{"key":"3289_CR74","unstructured":"Dash A, Gamboa JCB, Ahmed S, Liwicki M, Afzal MZ. Tac- gan-text conditioned auxiliary classifier generative adversarial network. arXiv preprint arXiv:1703.06412. 2017"},{"issue":"5","key":"3289_CR75","doi-asserted-by":"publisher","first-page":"7789","DOI":"10.1007\/s11042-020-09965-5","volume":"80","author":"M Zhang","year":"2021","unstructured":"Zhang M, Li C, Zhou Z. Text to image synthesis using multi- generator text conditioned generative adversarial networks. Multimed Tools Appl. 2021;80(5):7789\u2013803.","journal-title":"Multimed Tools Appl"},{"key":"3289_CR76","doi-asserted-by":"crossref","unstructured":"Qiao T, Zhang J, Xu D, Tao D. Mirrorgan: Learning text-to-image generation by redescription. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019. pp. 1505\u20131514","DOI":"10.1109\/CVPR.2019.00160"},{"key":"3289_CR77","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2020.102866","volume":"107","author":"Q Cheng","year":"2020","unstructured":"Cheng Q, Gu X. Cross-modal feature alignment based hybrid attentional generative adversarial networks for text-to-image synthesis. Digi- tal Signal Processing. 2020;107: 102866.","journal-title":"Digi- tal Signal Processing"},{"key":"3289_CR78","unstructured":"Tao M, Tang H, Wu S, Sebe N, Jing X-Y, Wu F, Bao B. Df-gan: deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865. 2020"},{"key":"3289_CR79","doi-asserted-by":"crossref","unstructured":"Liu B, Song K, Zhu Y, de Melo G, Elgammal A. Time: text and image mutual-translation adversarial networks. In: Proceedings of the AAAI conference on artificial intelligence. 2021. vol. 35, pp. 2082\u20132090","DOI":"10.1609\/aaai.v35i3.16305"},{"key":"3289_CR80","doi-asserted-by":"crossref","unstructured":"Liao W, Hu K, Yang MY, Rosenhahn B. Text to image generation with semantic-spatial aware gan. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022. pp. 18187\u201318196","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"3289_CR81","doi-asserted-by":"crossref","unstructured":"Jeon E, Kim K, Kim D. Fa-gan: feature-aware gan for text to image synthesis. In: 2021 IEEE international conference on image processing (ICIP), IEEE. 2021. pp. 2443\u20132447","DOI":"10.1109\/ICIP42928.2021.9506172"},{"key":"3289_CR82","doi-asserted-by":"crossref","unstructured":"Eghbal-zadeh H, Fischer L, Hoch T. On conditioning gans to hierarchical ontologies. In: International conference on database and expert systems Applications, Springer. 2019. pp. 182\u2013186","DOI":"10.1007\/978-3-030-27684-3_23"},{"key":"3289_CR83","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1016\/j.patrec.2020.02.030","volume":"135","author":"KE Ak","year":"2020","unstructured":"Ak KE, Lim JH, Tham JY, Kassim AA. Semantically consistent text to fashion image synthesis with an enhanced attentional generative adversarial network. Pattern Recogn Lett. 2020;135:22\u20139.","journal-title":"Pattern Recogn Lett"},{"key":"3289_CR84","doi-asserted-by":"crossref","unstructured":"Zhang Y, Briq R, Tanke J, Gall J. Adversarial synthesis of human pose from text. In: DAGM German conference on pattern recognition, Springer. 2020. pp. 145\u2013158","DOI":"10.1007\/978-3-030-71278-5_11"},{"key":"3289_CR85","doi-asserted-by":"publisher","DOI":"10.1155\/2021\/5573751","author":"R Ma","year":"2021","unstructured":"Ma R, Lou J. Cpgan: an efficient architecture designing for text- to-image generative adversarial networks based on canonical polyadic decomposition. Sci Program. 2021. https:\/\/doi.org\/10.1155\/2021\/5573751.","journal-title":"Sci Program"},{"key":"3289_CR86","unstructured":"Karnewar, A.: T2F: text to face generation using Deep Learning. https:\/\/github.com\/akanimax\/T2F"},{"key":"3289_CR87","unstructured":"Karras T, Aila T, Laine S, Lehtinen J. Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196. 2017"},{"key":"3289_CR88","unstructured":"Chen X, Qing L, He X, Luo X, Xu Y. Ftgan: a fully-trained generative adversarial network for text to face generation. arXiv preprint arXiv:1904.05729. 2019"},{"key":"3289_CR89","doi-asserted-by":"crossref","unstructured":"Khan, M.Z., Jabeen, S., Khan, M.U.G., Saba, T., Rehmat, A., Rehman, A., Tariq, U.: A realistic image generation of face from text description using the fully trained generative adversarial networks. IEEE Access 9, 1250\u20131260 (2020)","DOI":"10.1109\/ACCESS.2020.3015656"},{"key":"3289_CR90","doi-asserted-by":"crossref","unstructured":"Ayanthi D, Munasinghe S. Text-to-face generation with stylegan2. arXiv preprint arXiv:2205.12512. 2022","DOI":"10.5121\/csit.2022.120805"},{"key":"3289_CR91","unstructured":"Wang T, Zhang T, Lovell B. Faces a la carte: text-to-face generation via attribute disentanglement. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 202). pp. 3380\u20133388"},{"key":"3289_CR92","doi-asserted-by":"crossref","unstructured":"Sun J, Deng Q, Li Q, Sun M, Ren M, Sun Z. Anyface: free- style text-to-face synthesis and manipulation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022. pp. 18687\u201318696","DOI":"10.1109\/CVPR52688.2022.01813"},{"key":"3289_CR93","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1706.03762","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I. Attention is all you need. Adv Neural Inf Process Syst. 2017. https:\/\/doi.org\/10.48550\/arXiv.1706.03762.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR94","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et al. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929. 2020"},{"key":"3289_CR95","doi-asserted-by":"crossref","unstructured":"Yang F, Yang H, Fu J, Lu H, Guo B. Learning texture transformer network for image super-resolution. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2020. pp. 5791\u20135800","DOI":"10.1109\/CVPR42600.2020.00583"},{"key":"3289_CR96","doi-asserted-by":"crossref","unstructured":"Chen H, Wang Y, Guo T, Xu C, Deng Y, Liu Z, Ma S, Xu C, Xu C, Gao W. Pre-trained image processing transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021. pp. 12299\u201312310","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"3289_CR97","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00b4egou H. Training data-efficient image transformers & distillation through attention. In: International conference on machine learning, PMLR. (2021). pp. 10347\u201310357"},{"key":"3289_CR98","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J. Deformable detr: deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159. 2020"},{"key":"3289_CR99","unstructured":"Ramesh A, Pavlov M, Goh G, Gray S, Voss C, Radford A, Chen M, Sutskever I. Zero-shot text-to-image generation. In: International conference on machine learning, PMLR. 2021. pp. 8821\u20138831"},{"key":"3289_CR100","first-page":"19822","volume":"34","author":"M Ding","year":"2021","unstructured":"Ding M, Yang Z, Hong W, Zheng W, Zhou C, Yin D, Lin J, Zou X, Shao Z, Yang H, et al. Cogview: Mastering text-to-image generation via transformers. Adv Neural Inf Process Syst. 2021;34:19822\u201335.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR101","doi-asserted-by":"crossref","unstructured":"Gu S, Chen D, Bao J, Wen F, Zhang B, Chen D, Yuan L, Guo B. Vector quantized diffusion model for text-to-image synthesis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 10696\u201310706 (2022)","DOI":"10.1109\/CVPR52688.2022.01043"},{"issue":"3","key":"3289_CR102","doi-asserted-by":"publisher","first-page":"1567","DOI":"10.1109\/TNSE.2022.3147787","volume":"9","author":"W Li","year":"2022","unstructured":"Li W, Wen S, Shi K, Yang Y, Huang T. Neural architecture search with a lightweight transformer for text-to-image synthesis. IEEE Trans Netw Sci Eng. 2022;9(3):1567\u201376.","journal-title":"IEEE Trans Netw Sci Eng"},{"key":"3289_CR103","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11063-022-10866-x","volume":"54","author":"J Zhu","year":"2022","unstructured":"Zhu J, Li Z, Wei J, Ma H. Pbgn: phased bidirectional generation network in text-to-image synthesis. Neural Process Lett. 2022;54:1\u201321.","journal-title":"Neural Process Lett"},{"key":"3289_CR104","doi-asserted-by":"crossref","unstructured":"Wu F, Liu L, Hao F, He F, Cheng J. Text-to-image synthesis based on object-guided joint-decoding transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 18113\u201318122 (2022)","DOI":"10.1109\/CVPR52688.2022.01758"},{"key":"3289_CR105","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1609.02612","author":"C Vondrick","year":"2016","unstructured":"Vondrick C, Pirsiavash H, Torralba A. Generating videos with scene dynamics. Adv Neural Inf Process Syst. 2016. https:\/\/doi.org\/10.48550\/arXiv.1609.02612.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR106","doi-asserted-by":"crossref","unstructured":"Saito, M., Matsumoto, E., Saito, S.: Temporal generative adversarial nets with singular value clipping. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2830\u20132839 (2017)","DOI":"10.1109\/ICCV.2017.308"},{"key":"3289_CR107","doi-asserted-by":"crossref","unstructured":"Tulyakov S, Liu M-Y, Yang X, KJ Mocogan Decomposing motion and content for video generation. In: Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. pp. 1526\u20131535","DOI":"10.1109\/CVPR.2018.00165"},{"key":"3289_CR108","doi-asserted-by":"crossref","unstructured":"Saito, M., Saito, S., Koyama, M., Kobayashi, S.: Train sparsely, generate densely: Memory-efficient unsupervised training of high-resolution temporal gan. International Journal of Computer Vision 128(10), 2586\u20132606 (2020)","DOI":"10.1007\/s11263-020-01333-y"},{"key":"3289_CR109","unstructured":"Clark, A., Donahue, J., Simonyan, K.: Adversarial video generation on complex datasets. arXiv preprint arXiv:1907.06571 (2019)"},{"key":"3289_CR110","doi-asserted-by":"crossref","unstructured":"Ohnishi, K., Yamamoto, S., Ushiku, Y., Harada, T.: Hierarchical video generation from orthogonal information: Optical flow and texture. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 32 (2018)","DOI":"10.1609\/aaai.v32i1.11881"},{"key":"3289_CR111","doi-asserted-by":"crossref","unstructured":"Nakahira, Y., Kawamoto, K.: Dcvgan: Depth conditional video gen- eration. In: 2019 IEEE International Conference on Image Processing (ICIP), pp. 749\u2013753 (2019). IEEE","DOI":"10.1109\/ICIP.2019.8803764"},{"key":"3289_CR112","unstructured":"Acharya, D., Huang, Z., Paudel, D.P., Van Gool, L.: Towards high resolution video generation with progressive growing of sliced wasserstein gans. arXiv preprint arXiv:1810.02419 (2018)"},{"key":"3289_CR113","doi-asserted-by":"crossref","unstructured":"Munoz, A., Zolfaghari, M., Argus, M., Brox, T.: Temporal shift gan for large scale video generation. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3179\u20133188 (2021)","DOI":"10.1109\/WACV48630.2021.00322"},{"key":"3289_CR114","unstructured":"Tian, Y., Ren, J., Chai, M., Olszewski, K., Peng, X., Metaxas, D.N., Tulyakov, S.: A good image generator is what you need for high- resolution video synthesis. arXiv preprint arXiv:2104.15069 (2021)"},{"key":"3289_CR115","doi-asserted-by":"crossref","unstructured":"Hong, K., Uh, Y., Byun, H.: Arrowgan: Learning to generate videos by learning arrow of time. Neurocomputing 438, 223\u2013234 (2021)","DOI":"10.1016\/j.neucom.2021.01.043"},{"key":"3289_CR116","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12233","author":"Y Li","year":"2018","unstructured":"Li Y, Min M, Shen D, Carlson D, Carin L. Video generation from text. Proc AAAI Conf Artif Intell. 2018. https:\/\/doi.org\/10.1609\/aaai.v32i1.12233.","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"3289_CR117","unstructured":"Kumar R, Sotelo J, Kumar K, de Br\u00b4ebisson A, Bengio Y. Obamanet: photo-realistic lip-sync from text. arXiv preprint arXiv:1801.01442. 2017"},{"key":"3289_CR118","unstructured":"Stoll S, Camg\u00a8oz NC, Hadfield S, Bowden R. Sign language production using neural machine translation and generative adversarial networks. In: Proceedings of the 29th British machine vision conference (BMVC 2018). University of Surrey. 2018"},{"key":"3289_CR119","doi-asserted-by":"crossref","unstructured":"Deng, K., Fei, T., Huang, X., Peng, Y.: Irc-gan: Introspective recurrent convolutional gan for text-to-video generation. In: IJCAI, pp. 2216\u20132222 (2019)","DOI":"10.24963\/ijcai.2019\/307"},{"key":"3289_CR120","doi-asserted-by":"crossref","unstructured":"Li Y, Gan Z, Shen Y, Liu J, Cheng Y, Wu Y, Carin L, Carlson D, Gao J. Storygan: a sequential conditional gan for story visualization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2019. pp. 6329\u20136338","DOI":"10.1109\/CVPR.2019.00649"},{"key":"3289_CR121","doi-asserted-by":"crossref","unstructured":"Yu H, Huang Y, Pi L, Wang L. Recurrent deconvolutional generative adversarial networks with application to text guided video generation. arXiv preprint arXiv:2008.05856. 2020","DOI":"10.1007\/978-3-030-31723-2_2"},{"key":"3289_CR122","doi-asserted-by":"publisher","first-page":"153113","DOI":"10.1109\/ACCESS.2020.3017881","volume":"8","author":"D Kim","year":"2020","unstructured":"Kim D, Joo D, Kim J. Tivgan: text to image to video generation with step-by-step evolutionary generator. IEEE Access. 2020;8:153113\u201322.","journal-title":"IEEE Access"},{"key":"3289_CR123","doi-asserted-by":"publisher","first-page":"7454","DOI":"10.1109\/TIP.2020.3003227","volume":"29","author":"Q Chen","year":"2020","unstructured":"Chen Q, Wu Q, Chen J, Wu Q, van den Hengel A, Tan M. Scripted video generation with a bottom-up generative adversarial network. IEEE Trans Image Process. 2020;29:7454\u201367.","journal-title":"IEEE Trans Image Process"},{"key":"3289_CR124","unstructured":"Sotelo J, Mehri S, Kumar K, Santos JF, Kastner K, Courville A, Bengio Y. Char2wav: End-to-end speech synthesis. 2017"},{"key":"3289_CR125","unstructured":"Sohl-Dickstein J, Weiss E, Maheswaranathan N, Ganguli S. Deep unsupervised learning using nonequilibrium thermodynamics. In: International conference on machine learning, PMLR. 2015. pp. 2256\u20132265"},{"key":"3289_CR126","doi-asserted-by":"crossref","unstructured":"He Z, Sun T, Wang K, Huang X, Qiu X. Diffusionbert: Improving generative masked language models with diffusion models. arXiv preprint arXiv:2211.15029. 2022","DOI":"10.18653\/v1\/2023.acl-long.248"},{"key":"3289_CR127","unstructured":"Popov V, Vovk I, Gogoryan V, Sadekova T, Kudinov M. Gradtts: a diffusion probabilistic model for text-to-speech. In: International conference on machine learning, PMLR. 2021. pp. 8599\u20138608"},{"key":"3289_CR128","doi-asserted-by":"crossref","unstructured":"Rombach R, Blattmann A, Lorenz D, Esser P. Ommer B. High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2022. pp. 10684\u201310695","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"3289_CR129","first-page":"8633","volume":"35","author":"J Ho","year":"2022","unstructured":"Ho J, Salimans T, Gritsenko A, Chan W, Norouzi M, Fleet DJ. Video diffusion models. Adv Neural Inf Process Syst. 2022;35:8633\u201346.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR130","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M. Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125. 2022. 1 (2), 3"},{"key":"3289_CR131","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, Li L, Whang J, Denton EL, Ghasemipour K, Gontijo Lopes R, Karagol Ayan B, Salimans T, et al. Photorealistic text-to-image diffusion models with deep language understanding. Adv Neural Inf Process Syst. 2022;35:36479\u201394.","journal-title":"Adv Neural Inf Process Syst"},{"key":"3289_CR132","unstructured":"Nichol AQ, Dhariwal P. Improved denoising diffusion probabilistic models. In: International conference on machine learning, PMLR. 2021. pp. 8162\u20138171"},{"key":"3289_CR133","unstructured":"Palash MAH, Nasim MAA, Dhali A, Afrin F. Fine-grained image generation from Bangla text description using attentional generative adversarial network. arXiv preprint arXiv:2109.11749. 2021"}],"container-title":["SN Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-03289-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42979-024-03289-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42979-024-03289-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T05:08:49Z","timestamp":1728104929000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42979-024-03289-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,5]]},"references-count":133,"journal-issue":{"issue":"7","published-online":{"date-parts":[[2024,10]]}},"alternative-id":["3289"],"URL":"https:\/\/doi.org\/10.1007\/s42979-024-03289-z","relation":{},"ISSN":["2661-8907"],"issn-type":[{"value":"2661-8907","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,5]]},"assertion":[{"value":"19 March 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 June 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 October 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}}],"article-number":"935"}}