{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T05:40:10Z","timestamp":1746078010372,"version":"3.40.4"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T00:00:00Z","timestamp":1715731200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T00:00:00Z","timestamp":1715731200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62176062"],"award-info":[{"award-number":["62176062"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-19320-7","type":"journal-article","created":{"date-parts":[[2024,5,15]],"date-time":"2024-05-15T04:05:48Z","timestamp":1715745948000},"page":"10507-10526","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards photorealistic face generation using text-guided Semantic-Spatial FaceGAN"],"prefix":"10.1007","volume":"84","author":[{"given":"Qi","family":"Guo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7096-1830","authenticated-orcid":false,"given":"Xiaodong","family":"Gu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,5,15]]},"reference":[{"key":"19320_CR1","doi-asserted-by":"crossref","unstructured":"Bai Q, Yang C, Xu Y, Liu X, Yang Y, Shen Y (2023) Glead: Improving gans with a generator-leading task. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 12094\u201312104","DOI":"10.1109\/CVPR52729.2023.01164"},{"key":"19320_CR2","unstructured":"Ben-Yosef M, Weinshall D (2018) Gaussian mixture generative adversarial networks for diverse datasets, and the unsupervised clustering of images. Preprint arXiv:1808.10356"},{"key":"19320_CR3","unstructured":"Brock A, Donahue J, Simonyan K (2019) Large, scale gan training for high fidelity natural image. 7th international conference on learning representations (iclr). New Orleans, LA"},{"key":"19320_CR4","doi-asserted-by":"crossref","unstructured":"Dash A, Ye J, Wang G (2023) A review of generative adversarial networks (gans) and its applications in a wide variety of disciplines: From medical to remote sensing. IEEE Access","DOI":"10.1109\/ACCESS.2023.3346273"},{"key":"19320_CR5","doi-asserted-by":"crossref","unstructured":"Deng Q, Cao J, Liu Y, Chai Z, Li Q, Sun Z (2020) Reference-guided face component editing. Preprint arXiv:2006.02051","DOI":"10.24963\/ijcai.2020\/70"},{"key":"19320_CR6","doi-asserted-by":"crossref","unstructured":"Doan T, Monteiro J, Albuquerque I, Mazoure B, Durand A, Pineau J, Hjelm RD (2019) On-line adaptative curriculum learning for gans. Proceedings of the aaai conference on artificial intelligence, vol 33, pp 3470\u20133477","DOI":"10.1609\/aaai.v33i01.33013470"},{"key":"19320_CR7","doi-asserted-by":"crossref","unstructured":"Du X, Peng J, Zhou Y, Zhang J, Chen S, Jiang G, ... Ji R (2023) Pixelface+: Towards controllable face generation and manipulation with text descriptions and segmentation masks. Proceedings of the 31st acm international conference on multimedia, pp 4666\u20134677","DOI":"10.1145\/3581783.3612067"},{"key":"19320_CR8","unstructured":"Franceschi J-Y, Gartrell M, Dos Santos L, Issenhuth T, de B\u00e9zenac E, Chen M, Rakotomamonjy A (2024) Unifying gans and score-based diffusion as generative particle models. Advances in Neural Information Processing Systems, 36"},{"key":"19320_CR9","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, ... Bengio Y (2014) Generative adversarial nets. Advances in neural information processing systems, 27"},{"issue":"11","key":"19320_CR10","doi-asserted-by":"publisher","first-page":"5464","DOI":"10.1109\/TIP.2019.2916751","volume":"28","author":"Z He","year":"2019","unstructured":"He Z, Zuo W, Kan M, Shan S, Chen X (2019) Attgan: Facial attribute editing by only changing what you want. IEEE Trans Image Process 28(11):5464\u20135478","journal-title":"IEEE Trans Image Process"},{"key":"19320_CR11","doi-asserted-by":"crossref","unstructured":"Kang M, Zhu J-Y, Zhang R, Park J, Shechtman E, Paris S, Park T (2023) Scaling up gans for text-to-image synthesis. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 10124\u201310134","DOI":"10.1109\/CVPR52729.2023.00976"},{"key":"19320_CR12","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aila T (2019) A style-based generator architecture for generative adversarial networks. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 4401\u20134410","DOI":"10.1109\/CVPR.2019.00453"},{"key":"19320_CR13","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aittala M, Hellsten J, Lehtinen J, Aila T (2020) Analyzing and improving the image quality of stylegan. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 8110\u20138119","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"19320_CR14","doi-asserted-by":"crossref","unstructured":"Kim M, Liu F, Jain A, Liu X (2023) Dcface: Synthetic face generation with dual condition diffusion model. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 12715\u201312725","DOI":"10.1109\/CVPR52729.2023.01223"},{"key":"19320_CR15","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. Preprint arXiv:1412.6980"},{"key":"19320_CR16","doi-asserted-by":"crossref","unstructured":"Koley S, Bhunia AK, Sain A, Chowdhury PN, Xiang T, Song Y-Z (2023) Picture that sketch: Photorealistic image generation from abstract sketches. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 6850\u20136861","DOI":"10.1109\/CVPR52729.2023.00662"},{"key":"19320_CR17","doi-asserted-by":"crossref","unstructured":"Lee C-H, Liu Z, Wu L, Luo P (2020) Maskgan: Towards diverse and interactive facial image manipulation. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 5549\u20135558","DOI":"10.1109\/CVPR42600.2020.00559"},{"key":"19320_CR18","unstructured":"Li B, Qi X, Lukasiewicz T, Torr P (2019a) Controllable text-to-image generation. Advances in Neural Information Processing Systems, 32"},{"key":"19320_CR19","unstructured":"Li B, Qi X, Lukasiewicz T, Torr P (2019b) Controllable text-to-image generation. Wallach H, Larochelle H, Beygelzimer A, d\u2019Alch\u00e9-Buc F, Fox E, Garnett R (eds), Advances in neural information processing systems, vol. 32. Curran Associates, Inc. Retrieved from https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/1d72310edc006dadf2190caad5802983-Paper.pdf"},{"key":"19320_CR20","doi-asserted-by":"crossref","unstructured":"Liao W, Hu K, Yang MY, Rosenhahn B (2022) Text to image generation with semantic-spatial aware gan. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 18187\u201318196","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"19320_CR21","doi-asserted-by":"publisher","first-page":"500","DOI":"10.1016\/j.cag.2023.07.038","volume":"115","author":"C Liu","year":"2023","unstructured":"Liu C, Hu J, Lin H (2023) Swf-gan: A text-to-image model based on sentence-word fusion perception. Comput Graph 115:500\u2013510","journal-title":"Comput Graph"},{"key":"19320_CR22","doi-asserted-by":"crossref","unstructured":"Liu Y, Li Q, Deng Q, Sun Z, Yang M-H (2023) Gan-based facial attribute manipulation. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2023.3298868"},{"key":"19320_CR23","doi-asserted-by":"crossref","unstructured":"Liu Y, Li Q, Sun Z (2019) Attribute-aware face aging with wavelet-based generative adversarial networks. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 11877\u201311886","DOI":"10.1109\/CVPR.2019.01215"},{"key":"19320_CR24","doi-asserted-by":"crossref","unstructured":"Nasir OR, Jha SK, Grover MS, Yu Y, Kumar A, Shah RR (2019) Text2facegan: Face generation from fine grained textual descriptions. 2019 ieee fifth international conference on multimedia big data (bigmm), pp 58\u201367","DOI":"10.1109\/BigMM.2019.00-42"},{"key":"19320_CR25","doi-asserted-by":"crossref","unstructured":"Nguyen V-Q, Suganuma M, Okatani T (2020) Efficient attention mechanism for visual dialog that can handle all the interactions between multiple inputs. European conference on computer vision, pp 223\u2013240","DOI":"10.1007\/978-3-030-58586-0_14"},{"issue":"18","key":"19320_CR26","doi-asserted-by":"publisher","first-page":"e6147","DOI":"10.1002\/cpe.6147","volume":"35","author":"X Ning","year":"2023","unstructured":"Ning X, Nan F, Xu S, Yu L, Zhang L (2023) Multi-view frontal face image generation: a survey. Concurr Comput Pract Exp 35(18):e6147","journal-title":"Concurr Comput Pract Exp"},{"key":"19320_CR27","unstructured":"Oza M, Chanda S, Doermann D (2021) Semantic text-to-face gan-st\u00a0$$\\hat{}$$\u00a0 2fg. Preprint arXiv:2107.10756"},{"key":"19320_CR28","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H (2016) Generative adversarial text to image synthesis. International conference on machine learning, pp 1060\u20131069"},{"issue":"3","key":"19320_CR29","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S (2015) Imagenet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252","journal-title":"Int J Comput Vis"},{"key":"19320_CR30","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: A unified embedding for face recognition and clustering. Proceedings of the ieee conference on computer vision and pattern recognition, pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"issue":"11","key":"19320_CR31","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster M, Paliwal KK (1997) Bidirectional recurrent neural networks. IEEE Trans Signal Process 45(11):2673\u20132681","journal-title":"IEEE Trans Signal Process"},{"key":"19320_CR32","unstructured":"Sharma R, Barratt S, Ermon S, Pande V (2018) Improved training with curriculum gans. Preprint arXiv:1807.09295"},{"key":"19320_CR33","doi-asserted-by":"crossref","unstructured":"Song Y, Soleymani M (2019) Polysemous visual-semantic embedding for cross-modal retrieval. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 1979\u20131988","DOI":"10.1109\/CVPR.2019.00208"},{"key":"19320_CR34","doi-asserted-by":"crossref","unstructured":"Sun J, Deng Q, Li Q, Sun M, Liu Y, Sun Z (2024) Anyface++: A unified framework for free-style text-to-face synthesis and manipulation. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2023.3345866"},{"key":"19320_CR35","doi-asserted-by":"crossref","unstructured":"Sun J, Deng Q, Li Q, Sun M, Ren M, Sun Z (2022) Anyface: Free-style text-to-face synthesis and manipulation. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 18687\u201318696","DOI":"10.1109\/CVPR52688.2022.01813"},{"key":"19320_CR36","doi-asserted-by":"crossref","unstructured":"Sun J, Li Q, Wang W, Zhao J, Sun Z (2021) Multi-caption text-to-face synthesis: Dataset and algorithm. Proceedings of the 29th acm international conference on multimedia, pp 2290\u20132298","DOI":"10.1145\/3474085.3475391"},{"key":"19320_CR37","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. Proceedings of the ieee conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"19320_CR38","unstructured":"Tao M, Tang H, Wu S, Sebe N, Jing X-Y, Wu F, Bao B (2020) Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis. Preprint arXiv:2008.05865"},{"key":"19320_CR39","doi-asserted-by":"publisher","unstructured":"Xia W, Yang Y, Xue J-H, Wu B (2021) Tedigan: Text-guided diverse face image generation and manipulation. 2021 ieee\/cvf conference on computer vision and pattern recognition (cvpr), pp 2256\u20132265. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00229","DOI":"10.1109\/CVPR46437.2021.00229"},{"key":"19320_CR40","doi-asserted-by":"crossref","unstructured":"Xu T, Zhang P, Huang Q, Zhang H, Gan Z, Huang X, He X (2018) Attngan: Fine-grained text to image generation with attentional generative adversarial networks. Proceedings of the ieee conference on computer vision and pattern recognition, pp 1316\u20131324","DOI":"10.1109\/CVPR.2018.00143"},{"issue":"3","key":"19320_CR41","doi-asserted-by":"publisher","first-page":"103667","DOI":"10.1016\/j.ipm.2024.103667","volume":"61","author":"E Yauri-Lozano","year":"2024","unstructured":"Yauri-Lozano E, Castillo-Cara M, Orozco-Barbosa L, Garc\u00eda-Castro R (2024) Generative adversarial networks for text-to-face synthesis & generation: A quantitative-qualitative analysis of natural language processing encoders for spanish. Inf Process Manag 61(3):103667","journal-title":"Inf Process Manag"},{"key":"19320_CR42","doi-asserted-by":"crossref","unstructured":"Zhan F, Yu Y, Wu R, Zhang J, Lu S, Liu L, ... Xing E (2023) Multimodal image synthesis and editing: The generative ai era","DOI":"10.1109\/TPAMI.2023.3305243"},{"key":"19320_CR43","unstructured":"Zhang H, Goodfellow I, Metaxas D, Odena A (2019) Self-attention generative adversarial networks. International conference on machine learning, pp 7354\u20137363"},{"key":"19320_CR44","doi-asserted-by":"crossref","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN (2017) Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks. Proceedings of the ieee international conference on computer vision, pp 5907\u20135915","DOI":"10.1109\/ICCV.2017.629"},{"issue":"8","key":"19320_CR45","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","volume":"41","author":"H Zhang","year":"2018","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN (2018) Stackgan++: Realistic image synthesis with stacked generative adversarial networks. IEEE Trans Pattern Anal Mach Intell 41(8):1947\u20131962","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"19320_CR46","doi-asserted-by":"crossref","unstructured":"Zhu M, Pan P, Chen W, Yang Y (2019) Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis. Proceedings of the ieee\/cvf conference on computer vision and pattern recognition, pp 5802\u20135810","DOI":"10.1109\/CVPR.2019.00595"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19320-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-19320-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-19320-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T05:01:40Z","timestamp":1746075700000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-19320-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,15]]},"references-count":46,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["19320"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-19320-7","relation":{},"ISSN":["1573-7721"],"issn-type":[{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2024,5,15]]},"assertion":[{"value":"24 October 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 April 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 May 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}