{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,5]],"date-time":"2025-06-05T11:30:50Z","timestamp":1749123050670,"version":"3.40.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031090363"},{"type":"electronic","value":"9783031090370"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-09037-0_23","type":"book-chapter","created":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T04:26:16Z","timestamp":1654057576000},"page":"275-286","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Modular StoryGAN with Background and Theme Awareness for Story Visualization"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5781-1088","authenticated-orcid":false,"given":"G\u00e1bor","family":"Sz\u0171cs","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2051-4036","authenticated-orcid":false,"given":"Modafar","family":"Al-Shouha","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,6,2]]},"reference":[{"issue":"2022","key":"23_CR1","first-page":"103382","volume":"82","author":"KK Babu","year":"2021","unstructured":"Babu, K.K., Dubey, S.R.: CDGAN: cyclic discriminative generative adversarial networks for image-to-image transformation. J. Vis. Commun. Image Represent. 82(2022), 103382 (2021)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"23_CR2","unstructured":"Bahdanau, D., Cho, K.H., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: 3rd International Conference on Learning Representations, ICLR 2015 (2015)"},{"key":"23_CR3","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. In: NIPS 2014 Workshop on Deep Learning, December 2014"},{"key":"23_CR4","doi-asserted-by":"publisher","unstructured":"Deng, K., Fei, T., Huang, X., Peng, Y.: IRC-GAN: introspective recurrent convolutional GAN for text-to-video generation. In: IJCAI Proceedings of the 28th International Joint Conference on Artificial Intelligence (IJCAI), pp. 2216\u20132222 (2019). https:\/\/doi.org\/10.24963\/ijcai.2019\/307","DOI":"10.24963\/ijcai.2019\/307"},{"key":"23_CR5","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Advances in Neural Information Processing Systems, vol. 27 (2014)"},{"key":"23_CR6","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: Advances in Neural Information Processing Systems, pp. 6626\u20136637 (2017)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Hu, Y., Luo, C., Chen, Z.: Make it move: controllable image-to-video generation with text descriptions. arXiv preprint arXiv:2112.02815 (2021)","DOI":"10.1109\/CVPR52688.2022.01768"},{"key":"23_CR8","doi-asserted-by":"crossref","unstructured":"Kim, K.M., Heo, M.O., Choi, S.H., Zhang, B.T.: DeepStory: video story qa by deep embedded memory networks. In: Proceedings of the 26th International Joint Conference on Artificial Intelligence, pp. 2016\u20132022 (2017)","DOI":"10.24963\/ijcai.2017\/280"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Lei, J., Wang, L., Shen, Y., Yu, D., Berg, T., Bansal, M.: MART: memory-augmented recurrent transformer for coherent video paragraph captioning. In: the 58th Annual Meeting of the Association for Computational Linguistics, pp. 2603\u20132614 (2020)","DOI":"10.18653\/v1\/2020.acl-main.233"},{"key":"23_CR10","doi-asserted-by":"publisher","first-page":"102956","DOI":"10.1016\/j.jvcir.2020.102956","volume":"73","author":"C Li","year":"2020","unstructured":"Li, C., Kong, L., Zhou, Z.: Improved-StoryGAN for sequential images visualization. J. Vis. Commun. Image Represent. 73, 102956 (2020)","journal-title":"J. Vis. Commun. Image Represent."},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: StoryGAN: a sequential conditional GAN for story visualization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6329\u20136338 (2019)","DOI":"10.1109\/CVPR.2019.00649"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Li, Y., Min, M.R., Shen, D., Carlson, D., Carin, L.: Video generation from text. In: 32nd AAAI Conference on Artificial Intelligence, pp. 7065\u20137072. AAAI Press (2018)","DOI":"10.1609\/aaai.v32i1.12233"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Maharana, A., Bansal, M.: Integrating visuospatial, linguistic, and commonsense structure into story visualization. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 6772\u20136786 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.543"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Maharana, A., Hannan, D., Bansal, M.: Improving generation and evaluation of visual stories via semantic consistency. In: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 2427\u20132442 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.194"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Marwah, T., Mittal, G., Balasubramanian, V.N.: Attentive semantic video generation using captions. In: IEEE International Conference on Computer Vision, pp. 1426\u20131434 (2017)","DOI":"10.1109\/ICCV.2017.159"},{"key":"23_CR16","doi-asserted-by":"crossref","unstructured":"Qiao, T., Zhang, J., Xu, D., Tao, D.: MirrorGAN: learning text-to-image generation by redescription. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1505\u20131514 (2019)","DOI":"10.1109\/CVPR.2019.00160"},{"key":"23_CR17","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069. PMLR (2016)"},{"key":"23_CR18","unstructured":"Sharma, S., Asri, L.E., Schulz, H., Zumer, J.: Relevance of unsupervised metrics in task-oriented dialogue for evaluating natural language generation. arXiv preprint arXiv:1706.09799 (2017)"},{"key":"23_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1007\/978-3-030-58520-4_2","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y-Z Song","year":"2020","unstructured":"Song, Y.-Z., Rui Tam, Z., Chen, H.-J., Lu, H.-H., Shuai, H.-H.: Character-preserving coherent story visualization. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12362, pp. 18\u201333. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58520-4_2"},{"key":"23_CR20","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MocoGAN: decomposing motion and content for video generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1526\u20131535 (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"23_CR21","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1016\/j.asoc.2018.12.019","volume":"76","author":"Z Wen","year":"2019","unstructured":"Wen, Z., Xie, L., Feng, H., Tan, Y.: Robust fusion algorithm based on RBF neural network with TS fuzzy model and its application to infrared flame detection problem. Appl. Soft Comput. 76, 251\u2013264 (2019)","journal-title":"Appl. Soft Comput."},{"key":"23_CR22","doi-asserted-by":"crossref","unstructured":"Xu, T., et al.: AttnGAN: fine-grained text to image generation with attentional generative adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1316\u20131324 (2018)","DOI":"10.1109\/CVPR.2018.00143"},{"key":"23_CR23","unstructured":"Yu, F., Koltun, V.: Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122 (2015)"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Yu, J., Lin, Z., Yang, J., Shen, X., Lu, X., Huang, T.S.: Free-form image inpainting with gated convolution. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4471\u20134480 (2019)","DOI":"10.1109\/ICCV.2019.00457"},{"key":"23_CR25","doi-asserted-by":"crossref","unstructured":"Yu, Y., Tu, Z., Lu, L., Chen, X., Zhan, H., Sun, Z.: Text2Video: automatic video generation based on text scripts. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 2753\u20132755 (2021)","DOI":"10.1145\/3474085.3478548"},{"key":"23_CR26","doi-asserted-by":"crossref","unstructured":"Zeng, G., Li, Z., Zhang, Y.: PororoGAN: an improved story visualization model on Pororo-SV dataset. In: Proceedings of the 2019 3rd International Conference on Computer Science and Artificial Intelligence, pp. 155\u2013159 (2019)","DOI":"10.1145\/3374587.3374649"},{"key":"23_CR27","doi-asserted-by":"publisher","unstructured":"Zhang, H., et al.: StackGAN: text to photo-realistic image synthesis with stacked generative adversarial networks. In: IEEE International Conference on Computer Vision (ICCV), vol. 1, pp. 5908\u20135916 (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.629","DOI":"10.1109\/ICCV.2017.629"},{"key":"23_CR28","doi-asserted-by":"crossref","unstructured":"Zhu, M., Pan, P., Chen, W., Yang, Y.: DM-GAN: dynamic memory generative adversarial networks for text-to-image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5802\u20135810 (2019)","DOI":"10.1109\/CVPR.2019.00595"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-09037-0_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,27]],"date-time":"2023-06-27T15:27:39Z","timestamp":1687879659000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-09037-0_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031090363","9783031090370"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-09037-0_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"2 June 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICPRAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Pattern Recognition and Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Paris","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"France","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 June 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 June 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icprai2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/icprai2022.sciencesconf.org\/1.6.If","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}