{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T14:28:08Z","timestamp":1742999288766,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031783821"},{"type":"electronic","value":"9783031783838"}],"license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-78383-8_24","type":"book-chapter","created":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T21:54:13Z","timestamp":1733090053000},"page":"359-373","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fine-grained Text to Image Synthesis"],"prefix":"10.1007","author":[{"given":"Xu","family":"Ouyang","sequence":"first","affiliation":[]},{"given":"Ying","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Kaiyue","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Gady","family":"Agam","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,2]]},"reference":[{"key":"24_CR1","unstructured":"Goodfellow, I.J., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. In: Proceedings of the 27th International Conference on Neural Information Processing Systems (NIPS\u201914), pp. 2672\u20132680. MIT Press, Cambridge, MA, USA (2014)"},{"key":"24_CR2","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative Adversarial Text to Image Synthesis. In: Balcan, M.F., Weinberger, K.Q. (eds.) Proceedings of The 33rd International Conference on Machine Learning, vol. 48, pp. 1060\u20131069. PMLR, New York, New York, USA (2016)"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Ouyang, X., Zhang, X., Ma, D., Agam, G.: Generating Image Sequence from Description with LSTM Conditional GAN. In: 2018 24th International Conference on Pattern Recognition (ICPR), pp. 2456\u20132461. IEEE, Beijing, China (2018)","DOI":"10.1109\/ICPR.2018.8545419"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Xu, T., Zhang, P., Huang, Q., Zhang, H., Gan, Z., Huang, X., He, X.: AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1316\u20131324. IEEE, Salt Lake City, UT, USA (2018)","DOI":"10.1109\/CVPR.2018.00143"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Chen, H., Zhang, W., Sun, Z., He, X., Fan, Y.: Towards Language-Free Training for Text-to-Image Generation. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 17886\u201317896. IEEE, New Orleans, LA, USA (2022)","DOI":"10.1109\/CVPR52688.2022.01738"},{"key":"24_CR6","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and Improving the Image Quality of StyleGAN. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 8107\u20138116. IEEE (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"24_CR7","doi-asserted-by":"crossref","unstructured":"Ye, S., Wang, H., Tan, M., Liu, F.: Recurrent Affine Transformation for Text-to-Image Synthesis. In: IEEE Transactions on Multimedia, vol. 26, pp. 462\u2013473. IEEE (2024)","DOI":"10.1109\/TMM.2023.3266607"},{"key":"24_CR8","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., Sutskever, I.: Zero-Shot Text-to-Image Generation. In: Proceedings of the 38th International Conference on Machine Learning, Proceedings of Machine Learning Research, vol. 139, pp. 8821\u20138831. PMLR (2021)"},{"key":"24_CR9","unstructured":"Ding, M., Yang, Z., Hong, W., Zheng, W., Zhou, C., Yin, D., Lin, J., et al.: Cogview: Mastering text-to-image generation via transformers. In: Advances in Neural Information Processing Systems, vol. 34, pp. 19822\u201319835. (2021)"},{"key":"24_CR10","unstructured":"van den Oord, A., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. In: Proceedings of the 31st International Conference on Neural Information Processing Systems (NIPS\u201917), pp. 6309\u20136318. Curran Associates Inc., Red Hook, NY, USA (2017)"},{"key":"24_CR11","unstructured":"Yu, J., Xu, Y., Koh, J.Y., Luong, T., Baid, G., Wang, Z., Vasudevan, V., et al.: Scaling autoregressive models for content-rich text-to-image generation. arXiv preprint arXiv:2206.10789, vol. 2, no. 3, p. 5 (2022)"},{"key":"24_CR12","unstructured":"Yu, J., Li, X., Koh, J.Y., Zhang, H., Pang, R., Qin, J., Ku, A., Xu, Y., Baldridge, J., Wu, Y.: Vector-quantized image modeling with improved VQGAN. arXiv preprint arXiv:2110.04627 (2021)"},{"key":"24_CR13","unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Gu, S., Liu, Z., Ye, X., Lin, T., Wang, M., Cui, S., Liu, H., Liu, Y., Sun, C., Du, J., Hu, H.: Vector Quantized Diffusion Model for Text-to-Image Synthesis. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10686\u201310696. IEEE, New Orleans, LA, USA (2022)","DOI":"10.1109\/CVPR52688.2022.01043"},{"key":"24_CR15","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with CLIP latents. arXiv preprint arXiv:2204.06125, vol. 1, no. 2, p. 3 (2022)"},{"key":"24_CR16","unstructured":"Saharia, C., Chan, W., Saxena, S., Lit, L., Whang, J., Denton, E., Seyed Ghasemipour, S.K., Karagol Ayan, B., Mahdavi, S.S., Gontijo-Lopes, R., Salimans, T., Ho, J., Fleet, D.J., Norouzi, M.: Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of the 36th International Conference on Neural Information Processing Systems (NIPS \u201922), Article 2643, pp. 36479\u201336494. Curran Associates Inc., Red Hook, NY, USA (2024)"},{"key":"24_CR17","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, H., Huang, W., Scott, M.R.: Cross-Batch Memory for Embedding Learning. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6387\u20136396. IEEE, Seattle, WA, USA (2020)","DOI":"10.1109\/CVPR42600.2020.00642"},{"key":"24_CR18","unstructured":"Salimans, T., Goodfellow, I., Zaremba, W., Cheung, V., Radford, A., Chen, X.: Improved techniques for training GANs. In: Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS\u201916), pp. 2234\u20132242. Curran Associates Inc., Red Hook, NY, USA (2016)"},{"key":"24_CR19","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., Abbeel, P.: InfoGAN: interpretable representation learning by information maximizing generative adversarial nets. In: Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS\u201916), pp. 2180\u20132188. Curran Associates Inc., Red Hook, NY, USA (2016)"},{"key":"24_CR20","unstructured":"Nguyen, A., Dosovitskiy, A., Yosinski, J., Brox, T., Clune, J.: Synthesizing the preferred inputs for neurons in neural networks via deep generator networks. In: Proceedings of the 30th International Conference on Neural Information Processing Systems (NIPS\u201916), pp. 3395\u20133403. Curran Associates Inc., Red Hook, NY, USA (2016)"},{"key":"24_CR21","unstructured":"Odena, A., Olah, C., Shlens, J.: Conditional image synthesis with auxiliary classifier GANs. In: Proceedings of the 34th International Conference on Machine Learning (ICML\u201917), vol. 70, pp. 2642\u20132651. JMLR.org (2017)"},{"key":"24_CR22","unstructured":"Ye, H., Yang, X., Takac, M., Sunderraman, R., Ji, S.: Improving text-to-image synthesis using contrastive learning. arXiv preprint arXiv:2107.02423 (2021)"},{"key":"24_CR23","unstructured":"Dash, A., Gamboa, J.C.B., Ahmed, S., Liwicki, M., Afzal, M.Z.: Tac-gan-text conditioned auxiliary classifier generative adversarial network. arXiv preprint arXiv:1703.06412 (2017)"},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"Zhang, H., Koh, J.Y., Baldridge, J., Lee, H., Yang, Y.: Cross-Modal Contrastive Learning for Text-to-Image Generation. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 833\u2013842. IEEE, Nashville, TN, USA (2021)","DOI":"10.1109\/CVPR46437.2021.00089"},{"key":"24_CR25","doi-asserted-by":"crossref","unstructured":"Yin, G., Liu, B., Sheng, L., Yu, N., Wang, X., Shao, J.: Semantics Disentangling for Text-To-Image Generation. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2322\u20132331. IEEE, Long Beach, CA, USA (2019)","DOI":"10.1109\/CVPR.2019.00243"},{"key":"24_CR26","unstructured":"Salimans, T., Goodfellow, I.J., Zaremba, W., Cheung, V., Radford, A., Chen, X.: Improved Techniques for Training GANs. In: Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems 2016, pp. 2226\u20132234. (2016)"},{"issue":"3","key":"24_CR27","doi-asserted-by":"publisher","first-page":"450","DOI":"10.1016\/0047-259X(82)90077-X","volume":"12","author":"DC Dowson","year":"1982","unstructured":"Dowson, D.C., Landau, B.V.: The Fr\u00e9chet distance between multivariate normal distributions. J. Multivar. Anal. 12(3), 450\u2013455 (1982)","journal-title":"J. Multivar. Anal."},{"key":"24_CR28","unstructured":"Barratt, S., Sharma, R.: A Note on the Inception Score. arXiv preprint arXiv:1801.01973. (2018)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-78383-8_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T23:41:26Z","timestamp":1733096486000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-78383-8_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"ISBN":["9783031783821","9783031783838"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-78383-8_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,2]]},"assertion":[{"value":"2 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kolkata","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"India","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icpr2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icpr2024.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}