{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T17:36:13Z","timestamp":1777570573534,"version":"3.51.4"},"publisher-location":"Cham","reference-count":53,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733826","type":"print"},{"value":"9783031733833","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:00:00Z","timestamp":1730592000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73383-3_9","type":"book-chapter","created":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T12:03:06Z","timestamp":1730548986000},"page":"146-164","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["PanoFree: Tuning-Free Holistic Multi-view Image Generation with\u00a0Cross-View Self-guidance"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-2990-9671","authenticated-orcid":false,"given":"Aoming","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7416-1216","authenticated-orcid":false,"given":"Zhong","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8582-1024","authenticated-orcid":false,"given":"Zhang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1545-019X","authenticated-orcid":false,"given":"Nannan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2126-6054","authenticated-orcid":false,"given":"Yi","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7074-3219","authenticated-orcid":false,"given":"Bryan A.","family":"Plummer","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,3]]},"reference":[{"key":"9_CR1","unstructured":"Bar-Tal, O., Yariv, L., Lipman, Y., Dekel, T.: MultiDiffusion: fusing diffusion paths for controlled image generation (2023)"},{"key":"9_CR2","unstructured":"Binkowski, M., Sutherland, D.J., Arbel, M., Gretton, A.: Demystifying MMD GANs. arXiv: abs\/1801.01401 (2018). https:\/\/api.semanticscholar.org\/CorpusID:3531856"},{"key":"9_CR3","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Chen, D.Z., Siddiqui, Y., Lee, H.Y., Tulyakov, S., Nie\u00dfner, M.: Text2Tex: text-driven texture synthesis via diffusion models. arXiv preprint arXiv:2303.11396 (2023)","DOI":"10.1109\/ICCV51070.2023.01701"},{"key":"9_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wang, G., Liu, Z.: Text2Light: zero-shot text-driven HDR panorama generation. ACM Trans. Graph. (TOG) 41(6), 1\u201316 (2022)","DOI":"10.1145\/3550454.3555447"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Cheng, Y.C., Lin, C.H., Lee, H.Y., Ren, J., Tulyakov, S., Yang, M.H.: InOut: diverse image outpainting via GAN inversion. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11421\u201311430 (2021). https:\/\/api.semanticscholar.org\/CorpusID:232478397","DOI":"10.1109\/CVPR52688.2022.01114"},{"key":"9_CR7","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat GANs on image synthesis. Adv. Neural Inf. Process. Syst. 34, 8780\u20138794 (2021)"},{"key":"9_CR8","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"9_CR9","unstructured":"Fang, C., Hu, X., Luo, K., Tan, P.: Ctrl-Room: controllable text-to-3D room meshes generation with layout constraints. arXiv preprint arXiv:2310.03602 (2023)"},{"key":"9_CR10","unstructured":"Feng, M., Liu, J., Cui, M., Xie, X.: Diffusion360: seamless 360 degree panoramic image generation based on diffusion models. arXiv: abs\/2311.13141 (2023). https:\/\/api.semanticscholar.org\/CorpusID:265351889"},{"key":"9_CR11","unstructured":"Fridman, R., Abecasis, A., Kasten, Y., Dekel, T.: SceneScape: text-driven consistent scene generation. arXiv preprint arXiv:2302.01133 (2023)"},{"key":"9_CR12","doi-asserted-by":"crossref","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","DOI":"10.1145\/3422622"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Hessel, J., Holtzman, A., Forbes, M., Bras, R.L., Choi, Y.: ClipScore: a reference-free evaluation metric for image captioning. arXiv: abs\/2104.08718 (2021). https:\/\/api.semanticscholar.org\/CorpusID:233296711","DOI":"10.18653\/v1\/2021.emnlp-main.595"},{"key":"9_CR14","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: Neural Information Processing Systems (2017). https:\/\/api.semanticscholar.org\/CorpusID:326772"},{"key":"9_CR15","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems (NeurIPS) (2020)"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Ho, J., Saharia, C., Chan, W., Fleet, D.J., Norouzi, M., Salimans, T.: Cascaded diffusion models for high fidelity image generation. J. Mach. Learn. Res. 23(47), 1\u201333 (2022)","DOI":"10.1145\/3528233.3530757"},{"key":"9_CR17","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"H\u00f6llein, L., Cao, A., Owens, A., Johnson, J., Nie\u00dfner, M.: Text2Room: extracting textured 3D meshes from 2D text-to-image models. arXiv preprint arXiv:2303.11989 (2023)","DOI":"10.1109\/ICCV51070.2023.00727"},{"key":"9_CR19","unstructured":"Karras, T., Aittala, M., Aila, T., Laine, S.: Elucidating the design space of diffusion-based generative models. arXiv preprint arXiv:2206.00364 (2022)"},{"key":"9_CR20","unstructured":"Karras, T., et al.: Alias-free generative adversarial networks. Adv. Neural Inf. Process. Syst. 34, 852\u2013863 (2021)"},{"key":"9_CR21","unstructured":"Lee, Y., Kim, K., Kim, H., Sung, M.: SyncDiffusion: coherent montage via synchronized joint diffusions. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"9_CR22","unstructured":"Li, J., Bansal, M.: PanoGen: text-conditioned panoramic environment generation for vision-and-language navigation. arXiv: abs\/2305.19195 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258967291"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Lin, C.H., Chang, C.C., Chen, Y.S., Juan, D.C., Wei, W., Chen, H.T.: COCO-GAN: generation by parts via conditional coordinating. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4511\u20134520 (2019). https:\/\/api.semanticscholar.org\/CorpusID:90262507","DOI":"10.1109\/ICCV.2019.00461"},{"key":"9_CR24","unstructured":"Lin, C.H., Lee, H.Y., Cheng, Y.C., Tulyakov, S., Yang, M.H.: InfinityGAN: towards infinite-pixel image synthesis. In: International Conference on Learning Representations (2021). https:\/\/api.semanticscholar.org\/CorpusID:238419701"},{"key":"9_CR25","unstructured":"Liu, X., Zhang, X., Ma, J., Peng, J., et\u00a0al.: InstaFlow: one step is enough for high-quality diffusion-based text-to-image generation. In: The International Conference on Learning Representations (2023)"},{"key":"9_CR26","unstructured":"Lu, C., Zhou, Y., Bao, F., Chen, J., Li, C., Zhu, J.: DPM-Solver: a fast ode solver for diffusion probabilistic model sampling in around 10 steps. arXiv preprint arXiv:2206.00927 (2022)"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Gool, L.V.: Repaint: inpainting using denoising diffusion probabilistic models. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11451\u201311461 (2022). https:\/\/api.semanticscholar.org\/CorpusID:246240274","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"9_CR28","unstructured":"Meng, C., et al.: SDEdit: guided image synthesis and editing with stochastic differential equations. In: International Conference on Learning Representations (2021). https:\/\/api.semanticscholar.org\/CorpusID:245704504"},{"key":"9_CR29","unstructured":"Mou, C., et al.: T2I-adapter: learning adapters to dig out more controllable ability for text-to-image diffusion models. arXiv: abs\/2302.08453 (2023). https:\/\/api.semanticscholar.org\/CorpusID:256900833"},{"key":"9_CR30","unstructured":"Nichol, A., et al.: GLIDE: towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"9_CR31","unstructured":"Oh, C.H., Cho, W., Park, D., Chae, Y., Wang, L., Yoon, K.J.: BIPS: bi-modal indoor panorama synthesis via residual depth-aided adversarial learning. arXiv: abs\/2112.06179 (2021). https:\/\/api.semanticscholar.org\/CorpusID:245123664"},{"key":"9_CR32","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: International Conference on Computer Vision, pp. 4195\u20134205 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"9_CR33","unstructured":"von Platen, P., et al.: Diffusers: state-of-the-art diffusion models (2022). https:\/\/github.com\/huggingface\/diffusers"},{"key":"9_CR34","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I.: Language models are unsupervised multitask learners (2019)"},{"key":"9_CR35","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"9_CR36","unstructured":"Razavi, A., Van\u00a0den Oord, A., Vinyals, O.: Generating diverse high-fidelity images with VQ-VAE-2. In: Advances in Neural Information Processing Systems, vol. 32 (2019)"},{"key":"9_CR37","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"9_CR38","unstructured":"Saharia, C., et\u00a0al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"9_CR39","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: International Conference on Machine Learning. PMLR (2015)"},{"key":"9_CR40","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"9_CR41","unstructured":"Song, Y., Ermon, S.: Generative modeling by estimating gradients of the data distribution. In: Advances in Neural Information Processing Systems (NeurIPS), vol. 32 (2019)"},{"key":"9_CR42","first-page":"12438","volume":"33","author":"Y Song","year":"2020","unstructured":"Song, Y., Ermon, S.: Improved techniques for training score-based generative models. Adv. Neural. Inf. Process. Syst. 33, 12438\u201312448 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"9_CR43","unstructured":"Song, Y., Sohl-Dickstein, J.N., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.: Score-based generative modeling through stochastic differential equations. In: ICLR (2021)"},{"key":"9_CR44","unstructured":"Tang, S., Zhang, F., Chen, J., Wang, P., Furukawa, Y.: MVDiffusion: enabling holistic multi-view image generation with correspondence-aware diffusion. arXiv: abs\/2307.01097 (2023). https:\/\/api.semanticscholar.org\/CorpusID:259316427"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Teterwak, P., et al.: Boundless: generative adversarial networks for image extension. In: 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 10520\u201310529 (2019). https:\/\/api.semanticscholar.org\/CorpusID:201106503","DOI":"10.1109\/ICCV.2019.01062"},{"key":"9_CR46","unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"9_CR47","unstructured":"Voynov, A., Hertz, A., Arar, M., Fruchter, S., Cohen-Or, D.: AnyLens: a generative diffusion model with any rendering lens (2023)"},{"key":"9_CR48","doi-asserted-by":"publisher","unstructured":"Wang, G., Yang, Y., Loy, C.C., Liu, Z.: StyleLight: HDR panorama generation for lighting estimation and editing. In: Avidan, S., Brostow, G., Cisse, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision \u2013 ECCV 2022. ECCV 2022. LNCS, vol. 13675, pp. 477\u2013492. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19784-0_28, https:\/\/api.semanticscholar.org\/CorpusID:251196614","DOI":"10.1007\/978-3-031-19784-0_28"},{"key":"9_CR49","doi-asserted-by":"crossref","unstructured":"Wang, H., Xiang, X., Fan, Y., Xue, J.H.: Customizing 360-degree panoramas through text-to-image diffusion models. arXiv: abs\/2310.18840 (2023). https:\/\/api.semanticscholar.org\/CorpusID:264590753","DOI":"10.1109\/WACV57701.2024.00486"},{"key":"9_CR50","unstructured":"Wu, T., Zheng, C., Cham, T.J.: PanoDiffusion: 360-degree panorama outpainting via diffusion (2023). https:\/\/api.semanticscholar.org\/CorpusID:259360663"},{"key":"9_CR51","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, Q., Song, J., Huang, X., Chen, Y., Liu, M.Y.: DiffCollage: parallel generation of large content with diffusion models. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 10188\u201310198 (2023). https:\/\/api.semanticscholar.org\/CorpusID:257834007","DOI":"10.1109\/CVPR52729.2023.00982"},{"key":"9_CR53","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018). https:\/\/api.semanticscholar.org\/CorpusID:4766599","DOI":"10.1109\/CVPR.2018.00068"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73383-3_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,2]],"date-time":"2024-11-02T12:08:15Z","timestamp":1730549295000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73383-3_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,3]]},"ISBN":["9783031733826","9783031733833"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73383-3_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,3]]},"assertion":[{"value":"3 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}