{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,27]],"date-time":"2025-07-27T07:45:13Z","timestamp":1753602313938,"version":"3.40.3"},"publisher-location":"Cham","reference-count":30,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031585340"},{"type":"electronic","value":"9783031585357"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-58535-7_12","type":"book-chapter","created":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T17:01:50Z","timestamp":1719939710000},"page":"139-151","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["An Improved AttnGAN Model for\u00a0Text-to-Image Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2307-1827","authenticated-orcid":false,"given":"Remya","family":"Gopalakrishnan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2316-8450","authenticated-orcid":false,"given":"Naveen","family":"Sambagni","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6738-7786","authenticated-orcid":false,"given":"P. V.","family":"Sudeep","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,3]]},"reference":[{"issue":"2","key":"12_CR1","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P.: Learning long-term dependencies with gradient descent is difficult. IEEE Trans. Neural Netw. 5(2), 157\u2013166 (1994)","journal-title":"IEEE Trans. Neural Netw."},{"issue":"11","key":"12_CR2","doi-asserted-by":"publisher","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Signal Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Signal Process."},{"key":"12_CR3","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, vol. 27, pp. 2672\u20132680 (2014)"},{"issue":"3","key":"12_CR4","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vis. 115(3), 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vis."},{"key":"12_CR5","unstructured":"Salimans, T., Goodfellow, I., Zaremba, W., Cheung, V., Radford, A., Chen, X.: Improved techniques for training GANs. In: Advances in Neural Information Processing Systems, vol. 29 (2016)"},{"key":"12_CR6","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"12_CR7","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"issue":"8","key":"12_CR8","doi-asserted-by":"publisher","first-page":"1947","DOI":"10.1109\/TPAMI.2018.2856256","volume":"41","author":"H Zhang","year":"2018","unstructured":"Zhang, H., et al.: StackGAN++: realistic image synthesis with stacked generative adversarial networks. IEEE Trans. Pattern Anal. Mach. Intell. 41(8), 1947\u20131962 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR9","unstructured":"Li, B., Qi, X., Lukasiewicz, T., Torr, P.: Controllable text-to-image generation. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"issue":"10","key":"12_CR10","doi-asserted-by":"publisher","first-page":"4291","DOI":"10.1109\/TNNLS.2020.3019893","volume":"32","author":"A Galassi","year":"2020","unstructured":"Galassi, A., Marco Lippi, M., Torroni, P.: Attention in natural language processing. IEEE Trans. Neural Netw. Learn. Syst. 32(10), 4291\u20134308 (2020)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"12_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104284","volume":"115","author":"S Naveen","year":"2021","unstructured":"Naveen, S., Kiran, M.S.R., Indupriya, M., Manikanta, T.V., Sudeep, P.V.: Transformer models for enhancing AttnGAN based text to image generation. Image Vis. Comput. 115, 104284 (2021)","journal-title":"Image Vis. Comput."},{"key":"12_CR12","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.neunet.2021.01.023","volume":"138","author":"D Peng","year":"2021","unstructured":"Peng, D., Yang, W., Liu, C., L\u00fc, S.: SAM-GAN: self-attention supporting multi-stage generative adversarial networks for text-to-image synthesis. Neural Netw. 138, 57\u201367 (2021)","journal-title":"Neural Netw."},{"key":"12_CR13","doi-asserted-by":"publisher","first-page":"36018","DOI":"10.1109\/ACCESS.2022.3163856","volume":"10","author":"F Andayani","year":"2022","unstructured":"Andayani, F., Theng, L.B., Tsun, M.T., Chua, C.: Hybrid LSTM-transformer model for emotion recognition from speech audio files. IEEE Access 10, 36018\u201336027 (2022)","journal-title":"IEEE Access"},{"key":"12_CR14","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103329","volume":"215","author":"A Borji","year":"2022","unstructured":"Borji, A.: Pros and cons of GAN evaluation measures: new developments. Comput. Vis. Image Underst. 215, 103329 (2022)","journal-title":"Comput. Vis. Image Underst."},{"key":"12_CR15","doi-asserted-by":"publisher","first-page":"182","DOI":"10.1016\/j.neucom.2021.12.005","volume":"473","author":"Z Zhang","year":"2022","unstructured":"Zhang, Z., Schomaker, L.: DiverGAN: an efficient and effective single-stage framework for diverse text-to-image generation. Neurocomputing 473, 182\u2013198 (2022)","journal-title":"Neurocomputing"},{"key":"12_CR16","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2023.104105","volume":"140","author":"D Jin","year":"2023","unstructured":"Jin, D., Li, G., Yu, Q., Yu, L., Cui, J., Qi, M.: GMF-GAN: gradual multi-granularity semantic fusion GAN for text-to-image synthesis. Digit. Signal Process. 140, 104105 (2023)","journal-title":"Digit. Signal Process."},{"key":"12_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"12_CR18","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069. PMLR (2016)"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: StackGAN: text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5907\u20135915 (2017)","DOI":"10.1109\/ICCV.2017.629"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Xu, T., et al.: AttnGAN: fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1316\u20131324 (2018)","DOI":"10.1109\/CVPR.2018.00143"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Xie, Y., Yang, L.: Photographic text-to-image synthesis with a hierarchically-nested adversarial network. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6199\u20136208 (2018)","DOI":"10.1109\/CVPR.2018.00649"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Qiao, T., Zhang, J., Xu, D., Tao, D.: MirrorGAN: learning text-to-image generation by redescription. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1505\u20131514 (2019)","DOI":"10.1109\/CVPR.2019.00160"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Tan, H., Liu, X., Li, X., Zhang, Y., Yin, B.: Semantics-enhanced adversarial nets for text-to-image synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10501\u201310510 (2019)","DOI":"10.1109\/ICCV.2019.01060"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Yin, G., Liu, B., Sheng, L., Yu, N., Wang, X., Shao, J.: Semantics disentangling for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2327\u20132336 (2019)","DOI":"10.1109\/CVPR.2019.00243"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: DM-GAN: dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135810 (2019)","DOI":"10.1109\/CVPR.2019.00595"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, H., Koh, J.Y., Baldridge, J., Lee, H., Yang, Y.: Cross-modal contrastive learning for text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 833\u2013842 (2021)","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Tao, M., Tang, H., Wu, F., Jing, X.Y., Bao, B.K., Xu, C.: DF-GAN: a simple and effective baseline for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16515\u201316525 (2022)","DOI":"10.1109\/CVPR52688.2022.01602"},{"key":"12_CR29","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: arXiv preprint arXiv:1412.6980 (2014)"},{"key":"12_CR30","unstructured":"Ye, H., Yang, X., Takac, M., Sunderraman, R., Ji, S.: Improving text-to-image synthesis using contrastive learning. In: arXiv preprint arXiv:2107.02423 (2021)"}],"container-title":["Communications in Computer and Information Science","Computer Vision and Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-58535-7_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T17:05:53Z","timestamp":1719939953000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-58535-7_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031585340","9783031585357"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-58535-7_12","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"3 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"CVIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computer Vision and Image Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jammu","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"cvip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iitjammu.ac.in\/cvip2023\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Online CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"461","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"140","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"30% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}