{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T06:51:01Z","timestamp":1762325461511,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T00:00:00Z","timestamp":1667001600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T00:00:00Z","timestamp":1667001600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072169"],"award-info":[{"award-number":["62072169"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s11042-022-14080-8","type":"journal-article","created":{"date-parts":[[2022,10,29]],"date-time":"2022-10-29T01:02:45Z","timestamp":1667005365000},"page":"15061-15077","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Multi-scale dual-modal generative adversarial networks for text-to-image synthesis"],"prefix":"10.1007","volume":"82","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5840-9664","authenticated-orcid":false,"given":"Bin","family":"Jiang","sequence":"first","affiliation":[]},{"given":"Yun","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Chao","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Fangqiang","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,29]]},"reference":[{"key":"14080_CR1","doi-asserted-by":"crossref","unstructured":"Chen Y, Liu L, Tao J, Xia R, Zhang Q, Yang K, Xiong J, Chen X (2020) The improved image inpainting algorithm via encoder and similarity constraint. Vis Comput, 1\u201315","DOI":"10.1007\/s00371-020-01932-3"},{"key":"14080_CR2","doi-asserted-by":"publisher","first-page":"112753","DOI":"10.1016\/j.eswa.2019.06.041","volume":"138","author":"Z Chen","year":"2019","unstructured":"Chen Z, Cai H, Zhang Y, Wu C, Mu M, Li Z, Sotelo MA (2019) A novel sparse representation model for pedestrian abnormal trajectory understanding. Expert Syst Appl 138:112753. https:\/\/doi.org\/10.1016\/j.eswa.2019.06.041","journal-title":"Expert Syst Appl"},{"key":"14080_CR3","doi-asserted-by":"publisher","first-page":"104812","DOI":"10.1016\/j.ssci.2020.104812","volume":"130","author":"Z Chen","year":"2020","unstructured":"Chen Z, Chen D, Zhang Y, Cheng X, Zhang M, Wu C (2020) Deep learning for autonomous ship-oriented small ship detection. Saf Sci 130:104812. https:\/\/doi.org\/10.1016\/j.ssci.2020.104812","journal-title":"Saf Sci"},{"key":"14080_CR4","doi-asserted-by":"crossref","unstructured":"Cheng J, Wu F, Tian Y, Wang L, Tao D (2020) Rifegan: rich feature generation for text-to-image synthesis from prior knowledge. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10911\u201310920","DOI":"10.1109\/CVPR42600.2020.01092"},{"key":"14080_CR5","unstructured":"Dash A, Gamboa JCB, Ahmed S, Liwicki M, Afzal MZ (2017) Tac-gan-text conditioned auxiliary classifier generative adversarial network. arXiv:170306412"},{"key":"14080_CR6","doi-asserted-by":"crossref","unstructured":"Fan X, Jiang W, Luo H, Mao W (2020) Modality-transfer generative adversarial network and dual-level unified latent representation for visible thermal person re-identification. Vis Comput, 1\u201316","DOI":"10.1007\/s00371-020-02015-z"},{"key":"14080_CR7","doi-asserted-by":"crossref","unstructured":"Fang Z, Liu Z, Liu T, Hung CC, Xiao J, Feng G (2021) Facial expression gan for voice-driven face generation. Vis Comput, 1\u201314","DOI":"10.1007\/s00371-021-02074-w"},{"key":"14080_CR8","first-page":"8312","volume":"33","author":"L Gao","year":"2019","unstructured":"Gao L, Chen D, Song J, Xu X, Zhang D, Shen HT (2019) Perceptual pyramid adversarial networks for text-to-image synthesis. Proc AAAI Conf Artif Intell 33:8312\u20138319","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"14080_CR9","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems, pp 2672\u20132680"},{"key":"14080_CR10","unstructured":"Gregor K, Danihelka I, Graves A, Rezende D, Wierstra D (2015) Draw: a recurrent neural network for image generation. In: International conference on machine learning (PMLR), pp 1462\u20131471"},{"key":"14080_CR11","doi-asserted-by":"publisher","first-page":"156828","DOI":"10.1109\/ACCESS.2020.3019826","volume":"8","author":"B Jiang","year":"2020","unstructured":"Jiang B, Huang W, Huang Y, Yang C, Xu F (2020) Deep fusion local-content and global-semantic for image inpainting. IEEE Access 8:156828\u2013156838","journal-title":"IEEE Access"},{"key":"14080_CR12","doi-asserted-by":"publisher","first-page":"5079","DOI":"10.1109\/TIP.2020.2978583","volume":"29","author":"B Jiang","year":"2020","unstructured":"Jiang B, Tu W, Yang C, Yuan J (2020) Context-integrated and feature-refined network for lightweight object parsing. IEEE Trans Image Process 29:5079\u20135093","journal-title":"IEEE Trans Image Process"},{"key":"14080_CR13","doi-asserted-by":"publisher","first-page":"205088","DOI":"10.1109\/ACCESS.2020.3037346","volume":"8","author":"B Jiang","year":"2020","unstructured":"Jiang B, Xu F, Huang Y, Yang C, Huang W, Xia J (2020) Adaptive adversarial latent space for novelty detection. IEEE Access 8:205088\u2013205098","journal-title":"IEEE Access"},{"key":"14080_CR14","doi-asserted-by":"crossref","unstructured":"Karimi M, Veni G, Yu YY (2020) Illegible text to readable text: An image-to-image transformation using conditional sliced wasserstein adversarial networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops, pp 552\u2013553","DOI":"10.1109\/CVPRW50498.2020.00284"},{"key":"14080_CR15","doi-asserted-by":"crossref","unstructured":"Kimura D, Chaudhury S, Narita M, Munawar A, Tachibana R (2020) Adversarial discriminative attention for robust anomaly detection. In: The IEEE winter conference on applications of computer vision, pp 2172\u20132181","DOI":"10.1109\/WACV45572.2020.9093428"},{"key":"14080_CR16","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization. arXiv:14126980"},{"key":"14080_CR17","unstructured":"Kingma DP, Welling M (2013) Auto-encoding variational Bayes. arXiv:13126114"},{"key":"14080_CR18","unstructured":"Li B, Qi X, Lukasiewicz T, Torr P (2019) Controllable text-to-image generation. In: Advances in neural information processing systems, pp 2065\u20132075"},{"key":"14080_CR19","doi-asserted-by":"crossref","unstructured":"Li B, Qi X, Lukasiewicz T, Torr PH (2020) Manigan: text-guided image manipulation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7880\u20137889","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"14080_CR20","doi-asserted-by":"crossref","unstructured":"Li R, Wang N, Feng F, Zhang G, Wang X (2020) Exploring global and local linguistic representation for text-to-image synthesis. IEEE Transactions on Multimedia","DOI":"10.1109\/TMM.2020.2972856"},{"key":"14080_CR21","doi-asserted-by":"crossref","unstructured":"Li W, Zhang P, Zhang L, Huang Q, He X, Lyu S, Gao J (2019) Object-driven text-to-image synthesis via adversarial training. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 12174\u201312182","DOI":"10.1109\/CVPR.2019.01245"},{"key":"14080_CR22","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European conference on computer vision. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"14080_CR23","unstructured":"Mirza M, Osindero S (2014) Conditional generative adversarial nets. arXiv:14111784"},{"key":"14080_CR24","unstructured":"Nam S, Kim Y, Kim SJ (2018) Text-adaptive generative adversarial networks: manipulating images with natural language. In: Advances in neural information processing systems, pp 42\u201351"},{"key":"14080_CR25","unstructured":"Odena A, Olah C, Shlens J (2017) Conditional image synthesis with auxiliary classifier gans. In: International conference on machine learning (PMLR), pp 2642\u20132651"},{"key":"14080_CR26","doi-asserted-by":"crossref","unstructured":"Peng D, Yang W, Liu C, L\u00fc S (2021) Sam-gan: self-attention supporting multi-stage generative adversarial networks for text-to-image synthesis. Neural Networks (8)","DOI":"10.1016\/j.neunet.2021.01.023"},{"key":"14080_CR27","doi-asserted-by":"crossref","unstructured":"Qiao T, Zhang J, Xu D, Tao D (2019) Mirrorgan: learning text-to-image generation by redescription. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1505\u20131514","DOI":"10.1109\/CVPR.2019.00160"},{"key":"14080_CR28","unstructured":"Radford A, Metz L, Chintala S (2015) Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv:151106434"},{"key":"14080_CR29","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H (2016) Generative adversarial text to image synthesis. arXiv:160505396"},{"key":"14080_CR30","unstructured":"Reed SE, Akata Z, Mohan S, Tenka S, Schiele B, Lee H (2016) Learning what and where to draw. In: Advances in neural information processing systems, pp 217\u2013225"},{"key":"14080_CR31","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"14080_CR32","doi-asserted-by":"crossref","unstructured":"Tan H, Liu X, Li X, Zhang Y, Yin B (2019) Semantics-enhanced adversarial nets for text-to-image synthesis. In: Proceedings of the IEEE international conference on computer vision, pp 10501\u201310510","DOI":"10.1109\/ICCV.2019.01060"},{"key":"14080_CR33","doi-asserted-by":"crossref","unstructured":"Tao M, Tang H, Wu S, Sebe N, Wu F, Jing XY (2020) Df-gan: deep fusion generative adversarial networks for text-to-image synthesis. arXiv:200805865","DOI":"10.1109\/ICIBA50161.2020.9277299"},{"key":"14080_CR34","unstructured":"Van Oord A, Kalchbrenner N, Kavukcuoglu K (2016) Pixel recurrent neural networks. In: International conference on machine learning (PMLR), pp 1747\u20131756"},{"key":"14080_CR35","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"14080_CR36","unstructured":"Wah C, Branson S, Welinder P, Perona P, Belongie S (2011) The caltech-ucsd birds-200-2011 dataset"},{"key":"14080_CR37","unstructured":"Wang Y, Yu L, van de Weijer J (2020) Deepi2i: enabling deep hierarchical image-to-image translation by transferring from gans. arXiv:201105867"},{"key":"14080_CR38","doi-asserted-by":"crossref","unstructured":"Wang Z, Quan Z, Wang ZJ, Hu X, Chen Y (2020) Text to image synthesis with bidirectional generative adversarial network. In: IEEE International conference on multimedia and expo (ICME). IEEE, pp 1\u20136","DOI":"10.1109\/ICME46284.2020.9102904"},{"key":"14080_CR39","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee JY, So Kweon I (2018) Cbam: convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"14080_CR40","doi-asserted-by":"crossref","unstructured":"Xia W, Yang Y, Xue JH, Wu B (2021) Tedigan: text-guided diverse face image generation and manipulation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 2256\u20132265","DOI":"10.1109\/CVPR46437.2021.00229"},{"key":"14080_CR41","doi-asserted-by":"crossref","unstructured":"Xian W, Sangkloy P, Agrawal V, Raj A, Lu J, Fang C, Yu F, Hays J (2018) Texturegan: controlling deep image synthesis with texture patches. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8456\u20138465","DOI":"10.1109\/CVPR.2018.00882"},{"key":"14080_CR42","doi-asserted-by":"crossref","unstructured":"Xu T, Zhang P, Huang Q, Zhang H, Gan Z, Huang X, He X (2018) Attngan: fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1316\u20131324","DOI":"10.1109\/CVPR.2018.00143"},{"issue":"99","key":"14080_CR43","first-page":"1","volume":"PP","author":"Y Yang","year":"2021","unstructured":"Yang Y, Wang L, Xie D, Deng C, Tao D (2021) Multi-sentence auxiliary adversarial networks for fine-grained text-to-image synthesis. IEEE Trans Image Process PP(99):1\u20131","journal-title":"IEEE Trans Image Process"},{"key":"14080_CR44","doi-asserted-by":"crossref","unstructured":"Yin G, Liu B, Sheng L, Yu N, Wang X, Shao J (2019) Semantics disentangling for text-to-image generation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2327\u20132336","DOI":"10.1109\/CVPR.2019.00243"},{"issue":"8","key":"14080_CR45","doi-asserted-by":"publisher","first-page":"1955","DOI":"10.1109\/TMM.2019.2951463","volume":"22","author":"M Yuan","year":"2019","unstructured":"Yuan M, Peng Y (2019) Ckd: cross-task knowledge distillation for text-to-image synthesis. IEEE Trans Multimed 22(8):1955\u20131968","journal-title":"IEEE Trans Multimed"},{"key":"14080_CR46","doi-asserted-by":"crossref","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN (2017) Stackgan: text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE international conference on computer vision, pp 5907\u20135915","DOI":"10.1109\/ICCV.2017.629"},{"issue":"8","key":"14080_CR47","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","volume":"41","author":"H Zhang","year":"2018","unstructured":"Zhang H, Xu T, Li H, Zhang S, Wang X, Huang X, Metaxas DN (2018) Stackgan++: realistic image synthesis with stacked generative adversarial networks. IEEE Trans Pattern Anal Mach Intell 41(8):1947\u20131962","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"14080_CR48","doi-asserted-by":"crossref","unstructured":"Zhang H, Koh JY, Baldridge J, Lee H, Yang Y (2021) Cross-modal contrastive learning for text-to-image generation","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"14080_CR49","doi-asserted-by":"crossref","unstructured":"Zhang Z, Xie Y, Yang L (2018) Photographic text-to-image synthesis with a hierarchically-nested adversarial network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6199\u20136208","DOI":"10.1109\/CVPR.2018.00649"},{"issue":"3","key":"14080_CR50","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s00371-018-1471-4","volume":"35","author":"X Zhou","year":"2019","unstructured":"Zhou X, Wang Y, Zhu Q, Xiao C, Lu X (2019) Ssg: superpixel segmentation and grabcut-based salient object segmentation. Vis Comput 35(3):385\u2013398","journal-title":"Vis Comput"},{"key":"14080_CR51","doi-asserted-by":"crossref","unstructured":"Zhu M, Pan P, Chen W, Yang Y (2019) Dm-gan: dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5802\u20135810","DOI":"10.1109\/CVPR.2019.00595"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-14080-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-14080-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-14080-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,3,24]],"date-time":"2023-03-24T11:49:05Z","timestamp":1679658545000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-14080-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,29]]},"references-count":51,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["14080"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-14080-8","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,10,29]]},"assertion":[{"value":"15 June 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 September 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 January 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 October 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}