{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T16:14:35Z","timestamp":1742919275137,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819785049"},{"type":"electronic","value":"9789819785056"}],"license":[{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T00:00:00Z","timestamp":1730937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8505-6_28","type":"book-chapter","created":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:03:04Z","timestamp":1730930584000},"page":"393-406","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["GAN-Diffusion Relay Model: Advancing Semantic Image Synthesis"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9029-0378","authenticated-orcid":false,"given":"Jinyin","family":"Jia","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0971-6593","authenticated-orcid":false,"given":"Jun","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3664-2323","authenticated-orcid":false,"given":"Anfei","family":"Fan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0004-4199-6147","authenticated-orcid":false,"given":"Junfan","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6946-9181","authenticated-orcid":false,"given":"Peng","family":"Cao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5934-5837","authenticated-orcid":false,"given":"Chiyu","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-1988-1477","authenticated-orcid":false,"given":"Wei","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"28_CR1","unstructured":"Balaji, Y., Nah, S., Huang, X., Vahdat, A., Song, J., Kreis, K., Liu, M.: Ediffi: Text-to-image diffusion models with an ensemble of expert denoisers. arxiv 2022. arXiv preprint arXiv:2211.01324 (2022)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Chen, Q., Koltun, V.: Photographic image synthesis with cascaded refinement networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1511\u20131520 (2017)","DOI":"10.1109\/ICCV.2017.168"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: A large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"28_CR4","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"28_CR5","unstructured":"Eastwood, C., Williams, C.K.: A framework for the quantitative evaluation of disentangled representations. In: International Conference on Learning Representations (2018)"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Goodfellow, I.J., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A.C., Bengio, Y.: Generative adversarial networks. Comm. ACM 63, 139\u2013144 (2014), https:\/\/api.semanticscholar.org\/CorpusID:1033682","DOI":"10.1145\/3422622"},{"key":"28_CR7","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"28_CR8","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"47","key":"28_CR9","first-page":"1","volume":"23","author":"J Ho","year":"2022","unstructured":"Ho, J., Saharia, C., Chan, W., Fleet, D.J., Norouzi, M., Salimans, T.: Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res. 23(47), 1\u201333 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"28_CR10","unstructured":"Hoogeboom, E., Heek, J., Salimans, T.: simple diffusion: End-to-end diffusion for high resolution images. In: International Conference on Machine Learning, pp. 13213\u201313232. PMLR (2023)"},{"key":"28_CR11","unstructured":"IsolaP, Z., Zhou, T., et\u00a0al.: Image to imagetranslation withconditionaladversarialnetworks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. Hawaii, USA 1125, 1134 (2017)"},{"key":"28_CR12","unstructured":"Karras, T., Aila, T., Laine, S., Lehtinen, J.: Progressive growing of gans for improved quality, stability, and variation. arXiv preprint arXiv:1710.10196 (2017)"},{"key":"28_CR13","first-page":"26565","volume":"35","author":"T Karras","year":"2022","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. Adv. Neural. Inf. Process. Syst. 35, 26565\u201326577 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"28_CR14","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/s00591-010-0080-8","volume":"58","author":"F Klinker","year":"2011","unstructured":"Klinker, F.: Exponential moving average versus moving exponential average. Math. Semesterber. 58, 97\u2013107 (2011)","journal-title":"Math. Semesterber."},{"key":"28_CR15","unstructured":"Liu, X., Yin, G., Shao, J., Wang, X., et\u00a0al.: Learning to predict layout-to-image conditional convolutions for semantic image synthesis. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"28_CR16","doi-asserted-by":"crossref","unstructured":"Lv, Z., Li, X., Niu, Z., Cao, B., Zuo, W.: Semantic-shape adaptive feature modulation for semantic image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11214\u201311223 (2022)","DOI":"10.1109\/CVPR52688.2022.01093"},{"key":"28_CR17","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.Y., Wang, T.C., Zhu, J.Y.: Semantic image synthesis with spatially-adaptive normalization. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2332\u20132341 (2019), https:\/\/api.semanticscholar.org\/CorpusID:81981856","DOI":"10.1109\/CVPR.2019.00244"},{"key":"28_CR18","unstructured":"Ramesh, A., Pavlov, M., Goh, G., Gray, S., Voss, C., Radford, A., Chen, M., Sutskever, I.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. Pmlr (2021)"},{"key":"28_CR19","unstructured":"Rissanen, S., Heinonen, M., Solin, A.: Generative modelling with inverse heat dissipation. arXiv preprint arXiv:2206.13397 (2022)"},{"key":"28_CR20","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: Medical Image Computing and Computer-assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, proceedings, part III 18, pp. 234\u2013241. Springer (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"28_CR22","first-page":"36479","volume":"35","author":"C Saharia","year":"2022","unstructured":"Saharia, C., Chan, W., Saxena, S., Li, L., Whang, J., Denton, E.L., Ghasemipour, K., Gontijo Lopes, R., Karagol Ayan, B., Salimans, T., et al.: Photorealistic text-to-image diffusion models with deep language understanding. Adv. Neural. Inf. Process. Syst. 35, 36479\u201336494 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"28_CR23","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"28_CR24","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"28_CR25","unstructured":"Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. arXiv preprint arXiv:2011.13456 (2020)"},{"key":"28_CR26","unstructured":"Sushko, V., Sch\u00f6nfeld, E., Zhang, D., Gall, J., Schiele, B., Khoreva, A.: You only need adversarial supervision for semantic image synthesis. arXiv preprint arXiv:2012.04781 (2020)"},{"key":"28_CR27","doi-asserted-by":"crossref","unstructured":"Tan, Z., Chai, M., Chen, D., Liao, J., Chu, Q., Liu, B., Hua, G., Yu, N.: Diverse semantic image synthesis via probability distribution modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7962\u20137971 (2021)","DOI":"10.1109\/CVPR46437.2021.00787"},{"issue":"9","key":"28_CR28","first-page":"4852","volume":"44","author":"Z Tan","year":"2021","unstructured":"Tan, Z., Chen, D., Chu, Q., Chai, M., Liao, J., He, M., Yuan, L., Hua, G., Yu, N.: Efficient semantic image synthesis via class-adaptive normalization. IEEE Trans. Pattern Anal. Mach. Intell. 44(9), 4852\u20134866 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"28_CR29","doi-asserted-by":"crossref","unstructured":"Tang, H., Bai, S., Sebe, N.: Dual attention gans for semantic image synthesis. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1994\u20132002 (2020)","DOI":"10.1145\/3394171.3416270"},{"key":"28_CR30","unstructured":"Teng, J., Zheng, W., Ding, M., Hong, W., Wangni, J., Yang, Z., Tang, J.: Relay diffusion: Unifying diffusion process across resolutions for image synthesis. arXiv preprint arXiv:2309.03350 (2023)"},{"key":"28_CR31","unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"28_CR32","unstructured":"Wang, T., Zhang, T., Zhang, B., Ouyang, H., Chen, D., Chen, Q., Wen, F.: Pretraining is all you need for image-to-image translation. arXiv preprint arXiv:2205.12952 (2022)"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Liu, M.Y., Zhu, J.Y., Tao, A., Kautz, J., Catanzaro, B.: High-resolution image synthesis and semantic manipulation with conditional gans. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8798\u20138807 (2018)","DOI":"10.1109\/CVPR.2018.00917"},{"key":"28_CR34","unstructured":"Wang, W., Bao, J., Zhou, W., Chen, D., Chen, D., Yuan, L., Li, H.: Semantic image synthesis via diffusion models. arXiv preprint arXiv:2207.00050 (2022)"},{"key":"28_CR35","doi-asserted-by":"crossref","unstructured":"Wu, Y., He, K.: Group normalization. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"28_CR36","unstructured":"Zhan, F., Yu, Y., Wu, R., Zhang, J., Lu, S., Liu, L., Kortylewski, A., Theobalt, C., Xing, E.: Multimodal image synthesis and editing: A survey. arXiv preprint arXiv:2112.13592 (2022)"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"28_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"28_CR39","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"},{"key":"28_CR40","doi-asserted-by":"crossref","unstructured":"Zhu, P., Abdal, R., Qin, Y., Wonka, P.: Sean: Image synthesis with semantic region-adaptive normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5104\u20135113 (2020)","DOI":"10.1109\/CVPR42600.2020.00515"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8505-6_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,6]],"date-time":"2024-11-06T22:06:56Z","timestamp":1730930816000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8505-6_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,7]]},"ISBN":["9789819785049","9789819785056"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8505-6_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,7]]},"assertion":[{"value":"7 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}