{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T00:30:54Z","timestamp":1775176254158,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T00:00:00Z","timestamp":1722816000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T00:00:00Z","timestamp":1722816000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62166025"],"award-info":[{"award-number":["62166025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62166025"],"award-info":[{"award-number":["62166025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62166025"],"award-info":[{"award-number":["62166025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62166025"],"award-info":[{"award-number":["62166025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62166025"],"award-info":[{"award-number":["62166025"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Project of Gansu Province","award":["21YF5GA073"],"award-info":[{"award-number":["21YF5GA073"]}]},{"name":"Science and Technology Project of Gansu Province","award":["21YF5GA073"],"award-info":[{"award-number":["21YF5GA073"]}]},{"name":"Science and Technology Project of Gansu Province","award":["21YF5GA073"],"award-info":[{"award-number":["21YF5GA073"]}]},{"name":"Science and Technology Project of Gansu Province","award":["21YF5GA073"],"award-info":[{"award-number":["21YF5GA073"]}]},{"name":"Science and Technology Project of Gansu Province","award":["21YF5GA073"],"award-info":[{"award-number":["21YF5GA073"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1007\/s00371-024-03585-y","type":"journal-article","created":{"date-parts":[[2024,8,5]],"date-time":"2024-08-05T03:11:46Z","timestamp":1722827506000},"page":"3017-3035","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["M-GAN: multiattribute learning and multimodal feature fusion-based generative adversarial network for text-to-image synthesis"],"prefix":"10.1007","volume":"41","author":[{"given":"Hong","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Wengai","family":"Li","sequence":"additional","affiliation":[]},{"given":"Dailin","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Jinhai","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Lijun","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,5]]},"reference":[{"issue":"1","key":"3585_CR1","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1109\/MSP.2017.2765202","volume":"35","author":"A Creswell","year":"2018","unstructured":"Creswell, A., White, T., Dumoulin, V., Arulkumaran, K., Sengupta, B., Bharath, A.A.: Generative adversarial networks: an overview. IEEE Signal Process. Mag. 35(1), 53\u201365 (2018)","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"3585_CR2","doi-asserted-by":"publisher","first-page":"2033","DOI":"10.1002\/cav.2033","volume":"33","author":"A Messaci","year":"2022","unstructured":"Messaci, A., Zenati, N., Belhocine, M., Otmane, S.: Zoom-fwd: efficient technique for 3d gestual interaction with distant and occluded objects in virtual reality. Comput. Animat. Virtual Worlds 33(1), 2033 (2022)","journal-title":"Comput. Animat. Virtual Worlds"},{"key":"3585_CR3","doi-asserted-by":"crossref","unstructured":"Luo, M., Zeng, Y., Banerjee, P., Baral, C.: Weakly-supervised visual-retriever-reader for knowledge-based question answering. arXiv preprint arXiv:2109.04014 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.517"},{"key":"3585_CR4","doi-asserted-by":"crossref","unstructured":"Yuan, K., Guo, S., Liu, Z., Zhou, A., Yu, F., Wu, W.: Incorporating convolution designs into visual transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 579\u2013588 (2021)","DOI":"10.1109\/ICCV48922.2021.00062"},{"issue":"1","key":"3585_CR5","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1109\/TII.2021.3085669","volume":"18","author":"J Li","year":"2021","unstructured":"Li, J., Chen, J., Sheng, B., Li, P., Yang, P., Feng, D.D., Qi, J.: Automatic detection and classification system of domestic waste via multimodel cascaded convolutional neural network. IEEE Trans. Ind. Inf. 18(1), 163\u2013173 (2021)","journal-title":"IEEE Trans. Ind. Inf."},{"issue":"3\u20134","key":"3585_CR6","doi-asserted-by":"publisher","first-page":"2096","DOI":"10.1002\/cav.2096","volume":"33","author":"C Zhu","year":"2022","unstructured":"Zhu, C., Wang, H., Xiao, Y., Dai, Y., Liu, Z., Zou, B.: Ovs-net: an effective feature extraction network for optical coherence tomography angiography vessel segmentation. Comput. Animat. Virtual Worlds 33(3\u20134), 2096 (2022)","journal-title":"Comput. Animat. Virtual Worlds"},{"key":"3585_CR7","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/j.neunet.2021.07.019","volume":"144","author":"S Frolov","year":"2021","unstructured":"Frolov, S., Hinz, T., Raue, F., Hees, J., Dengel, A.: Adversarial text-to-image synthesis: a review. Neural Netw. 144, 187\u2013209 (2021)","journal-title":"Neural Netw."},{"issue":"3","key":"3585_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3446374","volume":"54","author":"D Saxena","year":"2021","unstructured":"Saxena, D., Cao, J.: Generative adversarial networks (GANs) challenges, solutions, and future directions. ACM Comput. Surv. (CSUR) 54(3), 1\u201342 (2021)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"3585_CR9","doi-asserted-by":"publisher","first-page":"1275","DOI":"10.1109\/TIP.2020.3026728","volume":"30","author":"H Tan","year":"2020","unstructured":"Tan, H., Liu, X., Liu, M., Yin, B., Li, X.: KT-GAN: Knowledge-transfer generative adversarial network for text-to-image synthesis. IEEE Trans. Image Process. 30, 1275\u20131290 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"3585_CR10","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135810 (2019)","DOI":"10.1109\/CVPR.2019.00595"},{"key":"3585_CR11","doi-asserted-by":"crossref","unstructured":"Liang, J., Pei, W., Lu, F.: Cpgan: Content-parsing generative adversarial networks for text-to-image synthesis. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IV 16, pp. 491\u2013508 (2020). Springer","DOI":"10.1007\/978-3-030-58548-8_29"},{"key":"3585_CR12","doi-asserted-by":"crossref","unstructured":"Jiang, N., Sheng, B., Li, P., Lee, T.-Y.: Photohelper: Portrait photographing guidance via deep feature retrieval and fusion. IEEE Trans. Multimed. (2022)","DOI":"10.1109\/TMM.2022.3144890"},{"key":"3585_CR13","doi-asserted-by":"publisher","first-page":"567","DOI":"10.1109\/TMM.2021.3055362","volume":"24","author":"Q Xu","year":"2021","unstructured":"Xu, Q., Mei, Y., Liu, J., Li, C.: Multimodal cross-layer bilinear pooling for RGBT tracking. IEEE Trans. Multimed. 24, 567\u2013580 (2021)","journal-title":"IEEE Trans. Multimed."},{"key":"3585_CR14","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"3585_CR15","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069. PMLR (2016)"},{"key":"3585_CR16","unstructured":"Dash, A., Gamboa, J.C.B., Ahmed, S., Liwicki, M., Afzal, M.Z.: Tac-gan-text conditioned auxiliary classifier generative adversarial network. arXiv preprint arXiv:1703.06412 (2017)"},{"key":"3585_CR17","doi-asserted-by":"crossref","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Wang, X., Huang, X., Metaxas, D.N.: Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5907\u20135915 (2017)","DOI":"10.1109\/ICCV.2017.629"},{"issue":"8","key":"3585_CR18","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","volume":"41","author":"H Zhang","year":"2018","unstructured":"Zhang, H., Xu, T., Li, H., Zhang, S., Wang, X., Huang, X., Metaxas, D.N.: Stackgan++: realistic image synthesis with stacked generative adversarial networks. IEEE Trans. Pattern Anal. Mach. Intell. 41(8), 1947\u20131962 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"4","key":"3585_CR19","first-page":"1283","volume":"39","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Han, S., Zhang, Z., Wang, J., Bi, H.: CF-GAN: cross-domain feature fusion generative adversarial network for text-to-image synthesis. Vis. Comput. 39(4), 1283\u20131293 (2023)","journal-title":"Vis. Comput."},{"key":"3585_CR20","doi-asserted-by":"crossref","unstructured":"Qiao, T., Zhang, J., Xu, D., Tao, D.: Mirrorgan: Learning text-to-image generation by redescription. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1505\u20131514 (2019)","DOI":"10.1109\/CVPR.2019.00160"},{"key":"3585_CR21","unstructured":"Lin, X., Sun, S., Huang, W., Sheng, B., Li, P., Feng, D.D.: Eapt: efficient attention pyramid transformer for image processing. IEEE Trans. Multimed. (2021)"},{"key":"3585_CR22","doi-asserted-by":"crossref","unstructured":"Xu, T., Zhang, P., Huang, Q., Zhang, H., Gan, Z., Huang, X., He, X.: Attngan: Fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1316\u20131324 (2018)","DOI":"10.1109\/CVPR.2018.00143"},{"key":"3585_CR23","doi-asserted-by":"crossref","unstructured":"Zhang, H., Koh, J.Y., Baldridge, J., Lee, H., Yang, Y.: Cross-modal contrastive learning for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 833\u2013842 (2021)","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"3585_CR24","first-page":"1","volume":"1","author":"B Jiang","year":"2023","unstructured":"Jiang, B., Zeng, W., Yang, C., Wang, R., Zhang, B.: DE-GAN: text-to-image synthesis with dual and efficient fusion model. Multimed. Tools Appl. 1, 1\u201314 (2023)","journal-title":"Multimed. Tools Appl."},{"key":"3585_CR25","doi-asserted-by":"crossref","unstructured":"Yin, G., Liu, B., Sheng, L., Yu, N., Wang, X., Shao, J.: Semantics disentangling for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2327\u20132336 (2019)","DOI":"10.1109\/CVPR.2019.00243"},{"key":"3585_CR26","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Zhang, R., Chen, C., Li, C., Tensmeyer, C., Yu, T., Gu, J., Xu, J., Sun, T.: Lafite: Towards language-free training for text-to-image generation. arXiv preprint arXiv:2111.13792 (2021)","DOI":"10.1109\/CVPR52688.2022.01738"},{"key":"3585_CR27","doi-asserted-by":"crossref","unstructured":"Tao, M., Tang, H., Wu, F., Jing, X.-Y., Bao, B.-K., Xu, C.: Df-gan: A simple and effective baseline for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16515\u201316525 (2022)","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"3585_CR28","doi-asserted-by":"crossref","unstructured":"Tan, H., Liu, X., Yin, B., Li, X.: DR-GAN: Distribution regularization for text-to-image generation. IEEE Trans. Neural Networks Learn. Syst. (2022)","DOI":"10.1109\/TNNLS.2022.3165573"},{"key":"3585_CR29","doi-asserted-by":"crossref","unstructured":"Ruan, S., Zhang, Y., Zhang, K., Fan, Y., Tang, F., Liu, Q., Chen, E.: Dae-gan: Dynamic aspect-aware gan for text-to-image synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13960\u201313969 (2021)","DOI":"10.1109\/ICCV48922.2021.01370"},{"issue":"1","key":"3585_CR30","doi-asserted-by":"publisher","first-page":"54","DOI":"10.1007\/s44196-023-00233-6","volume":"16","author":"S Lu","year":"2023","unstructured":"Lu, S., Ding, Y., Liu, M., Yin, Z., Yin, L., Zheng, W.: Multiscale feature extraction and fusion of image and text in VQA. Int. J. Comput. Intell. Syst. 16(1), 54 (2023)","journal-title":"Int. J. Comput. Intell. Syst."},{"issue":"4","key":"3585_CR31","doi-asserted-by":"publisher","first-page":"664","DOI":"10.26599\/TST.2021.9010055","volume":"27","author":"C Peng","year":"2021","unstructured":"Peng, C., Zhang, C., Xue, X., Gao, J., Liang, H., Niu, Z.: Cross-modal complementary network with hierarchical fusion for multimodal sentiment classification. Tsinghua Sci. Technol. 27(4), 664\u2013679 (2021)","journal-title":"Tsinghua Sci. Technol."},{"key":"3585_CR32","doi-asserted-by":"crossref","unstructured":"Wu, Y., Zhan, P., Zhang, Y., Wang, L., Xu, Z.: Multimodal fusion with co-attention networks for fake news detection. In: Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021, pp. 2560\u20132569 (2021)","DOI":"10.18653\/v1\/2021.findings-acl.226"},{"key":"3585_CR33","doi-asserted-by":"crossref","unstructured":"Xue, Z., Marculescu, R.: Dynamic multimodal fusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2574\u20132583 (2023)","DOI":"10.1109\/CVPRW59228.2023.00256"},{"key":"3585_CR34","first-page":"32942","volume":"35","author":"Z-Y Dou","year":"2022","unstructured":"Dou, Z.-Y., Kamath, A., Gan, Z., Zhang, P., Wang, J., Li, L., Liu, Z., Liu, C., LeCun, Y., Peng, N., et al.: Coarse-to-fine vision-language pre-training with fusion in the backbone. Adv. Neural. Inf. Process. Syst. 35, 32942\u201332956 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3585_CR35","unstructured":"Welinder, P., Branson, S., Mita, T., Wah, C., Schroff, F., Belongie, S., Perona, P.: The caltech-ucsd birds-200-2011 dataset. California Inst. Technol., Pasadena, CA, USA, Tech. Rep. CaltechAUTHORS, 20111026\u2013120541847 (2011)"},{"key":"3585_CR36","unstructured":"Xia, X., Xu, C., Nan, B.: Inception-v3 for flower classification. In: 2017 2nd International Conference on Image, Vision and Computing (ICIVC), pp. 783\u2013787. IEEE (2017)"},{"key":"3585_CR37","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6\u201312, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"3585_CR38","doi-asserted-by":"crossref","unstructured":"Chong, M.J., Forsyth, D.: Effectively unbiased fid and inception score and where to find them. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6070\u20136079 (2020)","DOI":"10.1109\/CVPR42600.2020.00611"},{"key":"3585_CR39","doi-asserted-by":"crossref","unstructured":"Hinz, T., Fisher, M., Wang, O., Wermter, S.: Improved techniques for training single-image GANs. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1300\u20131309 (2021)","DOI":"10.1109\/WACV48630.2021.00134"},{"key":"3585_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"3585_CR41","doi-asserted-by":"crossref","unstructured":"Cheng, Q., Wen, K., Gu, X.: Vision-language matching for text-to-image synthesis via generative adversarial networks. IEEE Trans. Multimed. (2022)","DOI":"10.1109\/TMM.2022.3217384"},{"key":"3585_CR42","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., Sutskever, I.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR (2021)"},{"key":"3585_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, L., Huang, P., Chen, T., Fu, C., Hu, Q., Zhang, Y.: Multi-sentence complementarily generation for text-to-image synthesis. IEEE Trans. Multimed. (2023)","DOI":"10.1109\/TMM.2023.3297769"},{"key":"3585_CR44","doi-asserted-by":"crossref","unstructured":"Sheng, Y., Tao, M., Wang, J., Bao, B.-K.: ISF-GAN: Imagine, select, and fuse with GPT-based text enrichment for text-to-image synthesis. ACM Trans. Multimed. Comput. Commun. Appl. (2024)","DOI":"10.1145\/3650033"},{"key":"3585_CR45","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TGRS.2022.3219726","volume":"60","author":"J Chen","year":"2022","unstructured":"Chen, J., Guo, Y., Zhu, J., Sun, G., Qin, D., Deng, M., Liu, H.: Improving few-shot remote sensing scene classification with class name semantics. IEEE Trans. Geosci. Rem. Sens. 60, 1 (2022). https:\/\/doi.org\/10.1109\/TGRS.2022.3219726","journal-title":"IEEE Trans. Geosci. Rem. Sens."}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03585-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03585-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03585-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T09:09:59Z","timestamp":1741597799000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03585-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,5]]},"references-count":45,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,3]]}},"alternative-id":["3585"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03585-y","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,5]]},"assertion":[{"value":"16 July 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 August 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}