{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T15:33:12Z","timestamp":1769182392929,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819555666","type":"print"},{"value":"9789819555673","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5567-3_27","type":"book-chapter","created":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:09Z","timestamp":1769116389000},"page":"388-402","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Absolute Story: Visual Storytelling with\u00a0Consistent Subject and\u00a0Style"],"prefix":"10.1007","author":[{"given":"Lipeng","family":"Wang","sequence":"first","affiliation":[]},{"given":"Hongxing","family":"Fan","sequence":"additional","affiliation":[]},{"given":"Zehuan","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Lu","family":"Sheng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,23]]},"reference":[{"key":"27_CR1","unstructured":"Bai, J., et al.: Qwen-VL: a versatile vision-language model for understanding, localization, text reading, and beyond. arXiv preprint arXiv:2308.12966 (2023)"},{"key":"27_CR2","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. In: Advances in Neural Information Processing Systems (2021)"},{"key":"27_CR3","unstructured":"Gong, Y., et al.: TaleCrafter: interactive story visualization with multiple characters. arXiv preprint arXiv:2305.18247 (2023)"},{"key":"27_CR4","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: NeurIPS (2014)"},{"key":"27_CR5","unstructured":"Guo, Y., et al.: AnimateDiff: animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)"},{"key":"27_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1007\/978-3-030-01237-3_37","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Gupta","year":"2018","unstructured":"Gupta, T., Schwenk, D., Farhadi, A., Hoiem, D., Kembhavi, A.: Imagine this! Scripts to compositions to videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11212, pp. 610\u2013626. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_37"},{"key":"27_CR7","doi-asserted-by":"crossref","unstructured":"Hu, L.: Animate anyone: consistent and controllable image-to-video synthesis for character animation. In: CVPR, pp. 8153\u20138163 (2024)","DOI":"10.1109\/CVPR52733.2024.00779"},{"key":"27_CR8","unstructured":"Huang, Z., Fan, H., Wang, L., Sheng, L.: From parts to whole: a unified reference framework for controllable human image generation. arXiv preprint arXiv:2404.15267 (2024)"},{"key":"27_CR9","doi-asserted-by":"crossref","unstructured":"Kim, K.M., Heo, M.O., Choi, S.H., Zhang, B.T.: DeepStory: video story QA by deep embedded memory networks. arXiv preprint arXiv:1707.00836 (2017)","DOI":"10.24963\/ijcai.2017\/280"},{"key":"27_CR10","doi-asserted-by":"crossref","unstructured":"Kirillov, A., et\u00a0al.: Segment anything. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"27_CR11","doi-asserted-by":"crossref","unstructured":"Li, B.: Word-level fine-grained story visualization. In: ECCV, pp. 347\u2013362 (2022)","DOI":"10.1007\/978-3-031-20059-5_20"},{"key":"27_CR12","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: StoryGAN: a sequential conditional GAN for story visualization. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00649"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Liu, C., Wu, H., Zhong, Y., Zhang, X., Wang, Y., Xie, W.: Intelligent Grimm-open-ended visual storytelling via latent diffusion models. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.00592"},{"key":"27_CR14","doi-asserted-by":"crossref","unstructured":"Liu, S., et\u00a0al.: Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. arXiv preprint arXiv:2303.05499 (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"27_CR15","doi-asserted-by":"crossref","unstructured":"Maharana, A., Hannan, D., Bansal, M.: Improving generation and evaluation of visual stories via semantic consistency. arXiv preprint arXiv:2105.10026 (2021)","DOI":"10.18653\/v1\/2021.naacl-main.194"},{"key":"27_CR16","doi-asserted-by":"crossref","unstructured":"Maharana, A., Hannan, D., Bansal, M.: StoryDALL-E: adapting pretrained text-to-image transformers for story continuation. arXiv preprint arXiv:2209.06192 (2022)","DOI":"10.1007\/978-3-031-19836-6_5"},{"key":"27_CR17","unstructured":"Nichol, A.Q., et al.: GLIDE: towards photorealistic image generation and editing with text-guided diffusion models. In: ICML (2022)"},{"key":"27_CR18","unstructured":"Pan, X., Qin, P., Li, Y., Xue, H., Chen, W.: Synthesizing coherent story with auto-regressive latent diffusion models. arXiv preprint arXiv:2211.10950 (2022)"},{"key":"27_CR19","doi-asserted-by":"crossref","unstructured":"Rahman, T., Lee, H.Y., Ren, J., Tulyakov, S., Mahajan, S., Sigal, L.: Make-a-story: visual memory conditioned consistent story generation. In: CVPR, pp. 2493\u20132502 (2023)","DOI":"10.1109\/CVPR52729.2023.00246"},{"key":"27_CR20","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with CLIP Latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"27_CR21","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: ICML (2021)"},{"key":"27_CR22","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"27_CR23","unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487 (2022)"},{"key":"27_CR24","unstructured":"Shen, F., et al.: Boosting consistency in story visualization with rich-contextual conditional diffusion models. arXiv preprint arXiv:2407.02482 (2024)"},{"key":"27_CR25","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"27_CR26","doi-asserted-by":"crossref","unstructured":"Song, Y.Z., Rui\u00a0Tam, Z., Chen, H.J., Lu, H.H., Shuai, H.H.: Character-preserving coherent story visualization. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58520-4_2"},{"key":"27_CR27","unstructured":"Su, S., Guo, L., Gao, L., Shen, H.T., Song, J.: Make-a-storyboard: a general framework for storyboard with disentangled and merged control. arXiv preprint arXiv:2312.07549 (2023)"},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Tao, M., Bao, B.K., Tang, H., Wang, Y., Xu, C.: StoryImager: a unified and efficient framework for coherent story visualization and completion. In: ECCV (2024)","DOI":"10.1007\/978-3-031-72992-8_27"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Tewel, Y., et al.: Training-free consistent text-to-image generation. ACM Trans. Graph. (TOG) (2024)","DOI":"10.1145\/3658157"},{"key":"27_CR30","unstructured":"Yang, S., et al.: Seed-Story: multimodal long story generation with large language model. arXiv preprint arXiv:2407.08683 (2024)"},{"key":"27_CR31","unstructured":"Ye, H., Zhang, J., Liu, S., Han, X., Yang, W.: IP-Adapter: text compatible image prompt adapter for text-to-image diffusion models. arXiv preprint arXiv:2308.06721 (2023)"},{"key":"27_CR32","doi-asserted-by":"crossref","unstructured":"Zheng, S., Fu, Y.: ContextualStory: consistent visual storytelling with spatially-enhanced and storyline context (2024)","DOI":"10.1609\/aaai.v39i10.33153"},{"key":"27_CR33","unstructured":"Zhou, Y., Zhou, D., Cheng, M.M., Feng, J., Hou, Q.: StoryDiffusion: consistent self-attention for long-range image and video generation. In: NeurIPS (2024)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5567-3_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T21:13:15Z","timestamp":1769116395000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5567-3_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819555666","9789819555673"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5567-3_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"23 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}