{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T18:11:57Z","timestamp":1764785517700,"version":"3.37.3"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T00:00:00Z","timestamp":1732233600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"National Key R&D Project of China","award":["No.2021QY2102","No.2021QY2102","No.2021QY2102","No.2021QY2102","No.2021QY2102"],"award-info":[{"award-number":["No.2021QY2102","No.2021QY2102","No.2021QY2102","No.2021QY2102","No.2021QY2102"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["No.62172089","No.61972087","No.62172090","No.62106045","No.62172458"],"award-info":[{"award-number":["No.62172089","No.61972087","No.62172090","No.62106045","No.62172458"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"crossref","award":["No.BK20191258","No.BK20191258","No.BK20191258","No.BK20191258","No.BK20191258"],"award-info":[{"award-number":["No.BK20191258","No.BK20191258","No.BK20191258","No.BK20191258","No.BK20191258"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Jiangsu Provincial Key Laboratory of Computer Networking Technology"},{"name":"Jiangsu Provincial Key Laboratory of Network and Information Security","award":["No. BM2003201","No. BM2003201","No. BM2003201","No. BM2003201","No. BM2003201"],"award-info":[{"award-number":["No. BM2003201","No. BM2003201","No. BM2003201","No. BM2003201","No. BM2003201"]}]},{"name":"Key Laboratory of Computer Network and Information Integration of Ministry of Education of China","award":["No. 93K-9","No. 93K-9","No. 93K-9","No. 93K-9","No. 93K-9"],"award-info":[{"award-number":["No. 93K-9","No. 93K-9","No. 93K-9","No. 93K-9","No. 93K-9"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00138-024-01624-1","type":"journal-article","created":{"date-parts":[[2024,11,22]],"date-time":"2024-11-22T03:31:58Z","timestamp":1732246318000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Text-to-face synthesis based on facial landmarks prediction"],"prefix":"10.1007","volume":"36","author":[{"given":"Kun","family":"Wang","sequence":"first","affiliation":[]},{"given":"Lei","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Biwei","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jiuxin","family":"Cao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,22]]},"reference":[{"issue":"11","key":"1624_CR1","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"1624_CR2","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets. arXiv preprint arXiv:1411.1784 (2014)"},{"key":"1624_CR3","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Wang, X., Huang, X., Metaxas, D.N.: Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5907\u20135915 (2017). IEEE","DOI":"10.1109\/ICCV.2017.629"},{"key":"1624_CR4","doi-asserted-by":"crossref","unstructured":"Xu, T., Zhang, P., Huang, Q., Zhang, H., Gan, Z., Huang, X., He, X.: Attngan: Fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1316\u20131324 (2018). IEEE","DOI":"10.1109\/CVPR.2018.00143"},{"key":"1624_CR5","unstructured":"Van Oord, A., Kalchbrenner, N., Kavukcuoglu, K.: Pixel recurrent neural networks. In: International Conference on Machine Learning, pp. 1747\u20131756 (2016). PMLR"},{"key":"1624_CR6","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069 (2016). PMLR"},{"key":"1624_CR7","doi-asserted-by":"crossref","unstructured":"Hong, S., Yang, D., Choi, J., Lee, H.: Inferring semantic layout for hierarchical text-to-image synthesis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7986\u20137994 (2018). IEEE","DOI":"10.1109\/CVPR.2018.00833"},{"key":"1624_CR8","doi-asserted-by":"crossref","unstructured":"Li, W., Zhang, P., Zhang, L., Huang, Q., He, X., Lyu, S., Gao, J.: Object-driven text-to-image synthesis via adversarial training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12174\u201312182 (2019). IEEE","DOI":"10.1109\/CVPR.2019.01245"},{"key":"1624_CR9","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135810 (2019). IEEE","DOI":"10.1109\/CVPR.2019.00595"},{"key":"1624_CR10","unstructured":"Li, B., Qi, X., Lukasiewicz, T., Torr, P.H.: Controllable text-to-image generation. arXiv preprint arXiv:1909.07083 (2019)"},{"key":"1624_CR11","unstructured":"Tao, M., Tang, H., Wu, S., Sebe, N., Jing, X.-Y., Wu, F., Bao, B.: Df-gan: Deep fusion generative adversarial networks for text-to-image synthesis. arXiv preprint arXiv:2008.05865 (2020)"},{"key":"1624_CR12","doi-asserted-by":"crossref","unstructured":"Liao, W., Hu, K., Yang, M.Y., Rosenhahn, B.: Text to image generation with semantic-spatial aware gan. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18187\u201318196 (2022)","DOI":"10.1109\/CVPR52688.2022.01765"},{"key":"1624_CR13","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1109\/TMM.2023.3266607","volume":"26","author":"S Ye","year":"2023","unstructured":"Ye, S., Wang, H., Tan, M., Liu, F.: Recurrent affine transformation for text-to-image synthesis. IEEE Trans. Multimed. 26, 462\u2013473 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"1624_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111795","volume":"294","author":"D Jin","year":"2024","unstructured":"Jin, D., Yu, Q., Yu, L., Qi, M.: Saw-gan: multi-granularity text fusion generative adversarial networks for text-to-image generation. Knowl.-Based Syst. 294, 111795 (2024)","journal-title":"Knowl.-Based Syst."},{"key":"1624_CR15","doi-asserted-by":"publisher","first-page":"6956","DOI":"10.1109\/TMM.2024.3358086","volume":"26","author":"B Yang","year":"2024","unstructured":"Yang, B., Xiang, X., Kong, W., Zhang, J., Peng, Y.: Dmf-gan: deep multimodal fusion generative adversarial networks for text-to-image synthesis. IEEE Trans. Multimed. 26, 6956\u20136967 (2024)","journal-title":"IEEE Trans. Multimed."},{"key":"1624_CR16","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR17","unstructured":"Nichol, A.Q., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., Mcgrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. In: International Conference on Machine Learning, pp. 16784\u201316804 (2022). PMLR"},{"key":"1624_CR18","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., Salimans, T., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1624_CR19","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"1624_CR20","doi-asserted-by":"crossref","unstructured":"Phung, Q., Ge, S., Huang, J.-B.: Grounded text-to-image synthesis with attention refocusing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7932\u20137942 (2024)","DOI":"10.1109\/CVPR52733.2024.00758"},{"key":"1624_CR21","doi-asserted-by":"crossref","unstructured":"Wang, R., Chen, Z., Chen, C., Ma, J., Lu, H., Lin, X.: Compositional text-to-image synthesis with attention map control of diffusion models. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, pp. 5544\u20135552 (2024)","DOI":"10.1609\/aaai.v38i6.28364"},{"key":"1624_CR22","unstructured":"Gauthier, J.: Conditional generative adversarial nets for convolutional face generation. Class Project for Stanford CS231N: Convolutional Neural Networks for Visual Recognition, Winter semester 2014(5), 2 (2014)"},{"key":"1624_CR23","unstructured":"Di, X., Patel, V.M.: Face synthesis from visual attributes via sketch using conditional vaes and gans. arXiv preprint arXiv:1801.00077 (2017)"},{"key":"1624_CR24","doi-asserted-by":"crossref","unstructured":"Huang, R., Zhang, S., Li, T., He, R.: Beyond face rotation: Global and local perception gan for photorealistic and identity preserving frontal view synthesis. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2439\u20132448 (2017). IEEE","DOI":"10.1109\/ICCV.2017.267"},{"key":"1624_CR25","doi-asserted-by":"crossref","unstructured":"Di, X., Sindagi, V.A., Patel, V.M.: Gp-gan: Gender preserving gan for synthesizing faces from landmarks. In: 2018 24th International Conference on Pattern Recognition (ICPR), pp. 1079\u20131084 (2018). IEEE","DOI":"10.1109\/ICPR.2018.8545081"},{"key":"1624_CR26","unstructured":"Sun, P., Li, Y., Qi, H., Lyu, S.: Landmarkgan: Synthesizing faces from landmarks. arXiv preprint arXiv:2011.00269 (2020)"},{"key":"1624_CR27","doi-asserted-by":"crossref","unstructured":"Lu, Y., Wu, S., Tai, Y.-W., Tang, C.-K.: Image generation from sketch constraint using contextual gan. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 205\u2013220 (2018). ECCV","DOI":"10.1007\/978-3-030-01270-0_13"},{"key":"1624_CR28","doi-asserted-by":"publisher","first-page":"8797","DOI":"10.1109\/TIP.2021.3120669","volume":"30","author":"S Yang","year":"2021","unstructured":"Yang, S., Wang, Z., Liu, J., Guo, Z.: Controllable sketch-to-image translation for robust face synthesis. IEEE Trans. Image Process. 30, 8797\u20138810 (2021)","journal-title":"IEEE Trans. Image Process."},{"key":"1624_CR29","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chang, L., Cheng, Y., Jin, L., Cheng, Z., Deng, X., Duan, F.: Text2sketch: Learning face sketch from facial attribute text. In: 2018 25th IEEE International Conference on Image Processing (ICIP), pp. 669\u2013673 (2018). IEEE","DOI":"10.1109\/ICIP.2018.8451236"},{"key":"1624_CR30","doi-asserted-by":"crossref","unstructured":"Nasir, O.R., Jha, S.K., Grover, M.S., Yu, Y., Kumar, A., Shah, R.R.: Text2facegan: Face generation from fine grained textual descriptions. In: 2019 IEEE Fifth International Conference on Multimedia Big Data (BigMM), pp. 58\u201367 (2019). IEEE","DOI":"10.1109\/BigMM.2019.00-42"},{"key":"1624_CR31","unstructured":"Chen, X., Qing, L., He, X., Luo, X., Xu, Y.: Ftgan: A fully-trained generative adversarial networks for text to face generation. arXiv preprint arXiv:1904.05729 (2019)"},{"key":"1624_CR32","doi-asserted-by":"crossref","unstructured":"Qiao, X., Han, Y., Wu, Y., Zhang, Z.: Progressive text-to-face synthesis with generative adversarial network. In: 2021 16th IEEE International Conference on Automatic Face and Gesture Recognition (FG 2021), pp. 1\u20138 (2021). IEEE","DOI":"10.1109\/FG52635.2021.9667004"},{"key":"1624_CR33","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, T., Lovell, B.: Faces a la carte: Text-to-face generation via attribute disentanglement. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 3380\u20133388 (2021). IEEE","DOI":"10.1109\/WACV48630.2021.00342"},{"key":"1624_CR34","doi-asserted-by":"crossref","unstructured":"Xia, W., Yang, Y., Xue, J.-H., Wu, B.: Tedigan: Text-guided diverse face image generation and manipulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2256\u20132265 (2021). IEEE","DOI":"10.1109\/CVPR46437.2021.00229"},{"issue":"3\u20134","key":"1624_CR35","doi-asserted-by":"publisher","first-page":"2155","DOI":"10.1002\/cav.2155","volume":"34","author":"R Wei","year":"2023","unstructured":"Wei, R., Wang, P.: Setgan: semantic-text guided face image generation. Comput. Anim. Virtual Worlds 34(3\u20134), 2155 (2023)","journal-title":"Comput. Anim. Virtual Worlds"},{"key":"1624_CR36","doi-asserted-by":"crossref","unstructured":"Wei, S.-E., Ramakrishna, V., Kanade, T., Sheikh, Y.: Convolutional pose machines. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4724\u20134732 (2016). IEEE","DOI":"10.1109\/CVPR.2016.511"},{"key":"1624_CR37","doi-asserted-by":"crossref","unstructured":"Zhao, H., Shi, J., Qi, X., Wang, X., Jia, J.: Pyramid scene parsing network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2881\u20132890 (2017). IEEE","DOI":"10.1109\/CVPR.2017.660"},{"key":"1624_CR38","doi-asserted-by":"crossref","unstructured":"Schroff, F., Kalenichenko, D., Philbin, J.: Facenet: A unified embedding for face recognition and clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 815\u2013823 (2015). IEEE","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"1624_CR39","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.A.: Inception-v4, inception-resnet and the impact of residual connections on learning. In: Thirty-first AAAI Conference on Artificial Intelligence (2017). AAAI","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"1624_CR40","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017). IEEE","DOI":"10.1109\/ICCV.2017.322"},{"issue":"11","key":"1624_CR41","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Signal Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Signal Process."},{"issue":"3","key":"1624_CR42","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/89.568732","volume":"5","author":"B-H Juang","year":"1997","unstructured":"Juang, B.-H., Hou, W., Lee, C.-H.: Minimum classification error rate methods for speech recognition. IEEE Trans. Speech Audio Process. 5(3), 257\u2013265 (1997)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1624_CR43","unstructured":"Gatt, A., Tanti, M., Muscat, A., Paggio, P., Farrugia, R.A., Borg, C., Camilleri, K.P., Rosner, M., Plas, L.: Face2text: collecting an annotated image description corpus for the generation of rich face descriptions. arXiv preprint arXiv:1803.03827 (2018)"},{"key":"1624_CR44","unstructured":"Liu, Z., Luo, P., Wang, X., Tang, X.: Large-scale celebfaces attributes (celeba) dataset. Retrieved August 15(2018), 11 (2018)"},{"key":"1624_CR45","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Inf. Process. Syst. 30, (2017)"},{"key":"1624_CR46","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: Accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456 (2015). PMLR"},{"key":"1624_CR47","unstructured":"Miyato, T., Kataoka, T., Koyama, M., Yoshida, Y.: Spectral normalization for generative adversarial networks. arXiv preprint arXiv:1802.05957 (2018)"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01624-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-024-01624-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-024-01624-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,18]],"date-time":"2025-01-18T10:30:28Z","timestamp":1737196228000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-024-01624-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,22]]},"references-count":47,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["1624"],"URL":"https:\/\/doi.org\/10.1007\/s00138-024-01624-1","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2024,11,22]]},"assertion":[{"value":"8 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 September 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 October 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 November 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that we have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"6"}}