{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T19:10:02Z","timestamp":1748200202820,"version":"3.41.0"},"publisher-location":"Cham","reference-count":76,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031918377","type":"print"},{"value":"9783031918384","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91838-4_9","type":"book-chapter","created":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:34:05Z","timestamp":1748198045000},"page":"142-160","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["PRISM: Progressive Restoration for\u00a0Scene Graph-Based Image Manipulation"],"prefix":"10.1007","author":[{"given":"Pavel","family":"Jahoda","sequence":"first","affiliation":[]},{"given":"Yousef","family":"Yeganeh","sequence":"additional","affiliation":[]},{"given":"Ehsan","family":"Adeli","sequence":"additional","affiliation":[]},{"given":"Nassir","family":"Navab","sequence":"additional","affiliation":[]},{"given":"Azade","family":"Farshad","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"9_CR1","unstructured":"Arjovsky, M., Chintala, S., Bottou, L.: Wasserstein generative adversarial networks. In: International Conference on Machine Learning, pp. 214\u2013223. PMLR (2017)"},{"key":"9_CR2","unstructured":"Bau, D., et al.: Semantic photo manipulation with a generative image prior. arXiv preprint arXiv:2005.07727 (2020)"},{"key":"9_CR3","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"key":"9_CR4","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: InstructPix2Pix: learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"9_CR5","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., Abbeel, P.: InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"9_CR6","doi-asserted-by":"crossref","unstructured":"Cong, Y., Liao, W., Ackermann, H., Rosenhahn, B., Yang, M.Y.: Spatial-temporal transformer for dynamic scene graph generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 16372\u201316382 (2021)","DOI":"10.1109\/ICCV48922.2021.01606"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Dhamo, H., et al.: Semantic image manipulation using scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.00526"},{"key":"9_CR8","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1016\/j.patrec.2019.05.007","volume":"125","author":"H Dhamo","year":"2019","unstructured":"Dhamo, H., Tateno, K., Laina, I., Navab, N., Tombari, F.: Peeking behind objects: layered depth prediction from a single image. Pattern Recogn. Lett. 125, 333\u2013340 (2019)","journal-title":"Pattern Recogn. Lett."},{"key":"9_CR9","doi-asserted-by":"crossref","unstructured":"Fan, W.C., Yang, C.F., Yang, C.A., Wang, Y.C.F.: Target-free text-guided image manipulation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 588\u2013596 (2023)","DOI":"10.1609\/aaai.v37i1.25134"},{"key":"9_CR10","unstructured":"Farshad, A., Musatian, S., Dhamo, H., Navab, N.: MIGS: meta image generation from scene graphs. In: BMVC (2021)"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Farshad, A., Yeganeh, Y., Chi, Y., Shen, C., Ommer, B., Navab, N.: SceneGenie: scene graph guided diffusion models for image synthesis. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 88\u201398 (2023)","DOI":"10.1109\/ICCVW60793.2023.00016"},{"key":"9_CR12","unstructured":"Farshad, A., Yeganeh, Y., Dhamo, H., Tombari, F., Navab, N.: DisPositioNet: disentangled pose and identity in semantic image manipulation. In: BMVC (2022)"},{"key":"9_CR13","doi-asserted-by":"crossref","unstructured":"Gao, H., Pei, J., Huang, H.: ProGAN: network embedding via proximity generative adversarial network. In: Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 1308\u20131316 (2019)","DOI":"10.1145\/3292500.3330866"},{"key":"9_CR14","doi-asserted-by":"crossref","unstructured":"Garg, S., Dhamo, H., Farshad, A., Musatian, S., Navab, N., Tombari, F.: Unconditional scene graph generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 16362\u201316371 (2021)","DOI":"10.1109\/ICCV48922.2021.01605"},{"issue":"11","key":"9_CR15","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"I Goodfellow","year":"2020","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. Commun. ACM 63(11), 139\u2013144 (2020)","journal-title":"Commun. ACM"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Guo, Z., Chen, Z., Yu, T., Chen, J., Liu, S.: Progressive image inpainting with full-resolution residual network. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 2496\u20132504 (2019)","DOI":"10.1145\/3343031.3351022"},{"key":"9_CR17","doi-asserted-by":"crossref","unstructured":"He, S., et al.: Context-aware layout to image generation with enhanced object appearance. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15049\u201315058 (2021)","DOI":"10.1109\/CVPR46437.2021.01480"},{"key":"9_CR18","doi-asserted-by":"crossref","unstructured":"Herzig, R., Bar, A., Xu, H., Chechik, G., Darrell, T., Globerson, A.: Learning canonical representations for scene graph to image generation. In: European Conference on Computer Vision, pp. 210\u2013227. Springer (2020)","DOI":"10.1007\/978-3-030-58574-7_13"},{"key":"9_CR19","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"9_CR20","unstructured":"Hong, S., Yan, X., Huang, T.S., Lee, H.: Learning hierarchical semantic image manipulation through structured representations. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems, vol.\u00a031. Curran Associates, Inc. (2018)"},{"key":"9_CR21","doi-asserted-by":"crossref","unstructured":"Hong, S., Yang, D., Choi, J., Lee, H.: Inferring semantic layout for hierarchical text-to-image synthesis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7986\u20137994 (2018)","DOI":"10.1109\/CVPR.2018.00833"},{"key":"9_CR22","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456. PMLR (2015)"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: European Conference on Computer Vision, pp. 694\u2013711. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_43"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., Fei-Fei, L.: Image generation from scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1219\u20131228 (2018)","DOI":"10.1109\/CVPR.2018.00133"},{"key":"9_CR25","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., Van Der\u00a0Maaten, L., Fei-Fei, L., Lawrence\u00a0Zitnick, C., Girshick, R.: CLEVR: a diagnostic dataset for compositional language and elementary visual reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2901\u20132910 (2017)","DOI":"10.1109\/CVPR.2017.215"},{"key":"9_CR26","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"9_CR27","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4401\u20134410 (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"9_CR28","doi-asserted-by":"crossref","unstructured":"Kim, G., Kwon, T., Ye, J.C.: DiffusionClip: text-guided diffusion models for robust image manipulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2426\u20132435 (2022)","DOI":"10.1109\/CVPR52688.2022.00246"},{"key":"9_CR29","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. In: Bengio, Y., LeCun, Y. (eds.) 2nd International Conference on Learning Representations, ICLR 2014, Banff, AB, Canada, 14\u201316 April 2014, Conference Track Proceedings (2014). http:\/\/arxiv.org\/abs\/1312.6114"},{"issue":"1","key":"9_CR30","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vision 123(1), 32\u201373 (2017)","journal-title":"Int. J. Comput. Vision"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Li, B., Lin, X., Liu, B., He, Z.F., Lai, Y.K.: Lightweight text-driven image editing with disentangled content and attributes. IEEE Trans. Multimedia (2023)","DOI":"10.1109\/TMM.2023.3289755"},{"key":"9_CR32","doi-asserted-by":"crossref","unstructured":"Li, B., Qi, X., Lukasiewicz, T., Torr, P.H.: ManiGAN: text-guided image manipulation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7880\u20137889 (2020)","DOI":"10.1109\/CVPR42600.2020.00790"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Li, J., Wang, N., Zhang, L., Du, B., Tao, D.: Recurrent feature reasoning for image inpainting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7760\u20137768 (2020)","DOI":"10.1109\/CVPR42600.2020.00778"},{"key":"9_CR34","unstructured":"Li, Y., Ma, T., Bai, Y., Duan, N., Wei, S., Wang, X.: PasteGAN: a semi-parametric method to generate image from scene graph. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"9_CR35","doi-asserted-by":"crossref","unstructured":"Li, Y., Ouyang, W., Zhou, B., Shi, J., Zhang, C., Wang, X.: Factorizable net: an efficient subgraph-based framework for scene graph generation. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01246-5_21"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Li, Z., Wu, J., Koh, I., Tang, Y., Sun, L.: Image synthesis from layout with locality-aware mask adaption. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13819\u201313828 (2021)","DOI":"10.1109\/ICCV48922.2021.01356"},{"key":"9_CR37","unstructured":"Liu, M.Y., Tuzel, O.: Coupled generative adversarial networks. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"9_CR38","unstructured":"Meng, C., et al.: SDEdit: guided image synthesis and editing with stochastic differential equations. In: International Conference on Learning Representations (2022). https:\/\/openreview.net\/forum?id=aBsCjcPu_tE"},{"key":"9_CR39","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. In: Proceedings of Workshop at ICLR 2013 (2013)"},{"key":"9_CR40","unstructured":"Mirza, M., Osindero, S.: Conditional generative adversarial nets (2014)"},{"key":"9_CR41","unstructured":"Mittal, G., Agrawal, S., Agarwal, A., Mehta, S., Marwah, T.: Interactive image generation using scene graphs. CoRR abs\/1905.03743 (2019). http:\/\/arxiv.org\/abs\/1905.03743"},{"key":"9_CR42","unstructured":"Nichol, A., et al.: GLIDE: towards photorealistic image generation and editing with text-guided diffusion models. In: International Conference on Machine Learning (2022)"},{"key":"9_CR43","doi-asserted-by":"publisher","unstructured":"Ntavelis, E., Romero, A., Kastanis, I., Gool, L.V., Timofte, R.: SESAME: semantic editing of scenes by adding, manipulating or erasing objects. In: Computer Vision \u2013 ECCV 2020, pp. 394\u2013411. Springer (2020). https:\/\/doi.org\/10.1007\/978-3-030-58542-6_24","DOI":"10.1007\/978-3-030-58542-6_24"},{"key":"9_CR44","unstructured":"Odena, A., Olah, C., Shlens, J.: Conditional image synthesis with auxiliary classifier GANs. In: International Conference on Machine Learning, pp. 2642\u20132651. PMLR (2017)"},{"key":"9_CR45","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.Y., Wang, T.C., Zhu, J.Y.: Semantic image synthesis with spatially-adaptive normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"key":"9_CR46","doi-asserted-by":"crossref","unstructured":"Park, T., Liu, M.Y., Wang, T.C., Zhu, J.Y.: Semantic image synthesis with spatially-adaptive normalization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2337\u20132346 (2019)","DOI":"10.1109\/CVPR.2019.00244"},{"key":"9_CR47","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"9_CR48","unstructured":"Radford, A., Metz, L., Chintala, S.: Unsupervised representation learning with deep convolutional generative adversarial networks (2015)"},{"key":"9_CR49","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 (2022)"},{"key":"9_CR50","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. In: International Conference on Machine Learning, pp. 8821\u20138831. PMLR (2021)"},{"key":"9_CR51","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"9_CR52","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"issue":"1\u20134","key":"9_CR53","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1016\/0167-2789(92)90242-F","volume":"60","author":"LI Rudin","year":"1992","unstructured":"Rudin, L.I., Osher, S., Fatemi, E.: Nonlinear total variation based noise removal algorithms. Physica D 60(1\u20134), 259\u2013268 (1992)","journal-title":"Physica D"},{"key":"9_CR54","unstructured":"Salimans, T., Goodfellow, I., Zaremba, W., Cheung, V., Radford, A., Chen, X.: Improved techniques for training GANs. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"9_CR55","doi-asserted-by":"crossref","unstructured":"Shade, J., Gortler, S., He, L.W., Szeliski, R.: Layered depth images. In: Proceedings of the 25th Annual Conference on Computer Graphics and Interactive Techniques, pp. 231\u2013242 (1998)","DOI":"10.1145\/280814.280882"},{"key":"9_CR56","unstructured":"Shu, R., Bui, H., Ermon, S.: AC-GAN learns a biased distribution. In: NIPS Workshop on Bayesian Deep Learning, vol.\u00a08, p.\u00a034 (2017)"},{"key":"9_CR57","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556 (2014)"},{"key":"9_CR58","unstructured":"Sohl-Dickstein, J., Weiss, E., Maheswaranathan, N., Ganguli, S.: Deep unsupervised learning using nonequilibrium thermodynamics. In: Bach, F., Blei, D. (eds.) Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, PMLR, Lille, France, vol.\u00a037, pp. 2256\u20132265 (2015)"},{"key":"9_CR59","doi-asserted-by":"crossref","unstructured":"Sylvain, T., Zhang, P., Bengio, Y., Hjelm, R.D., Sharma, S.: Object-centric image generation from layouts. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 2647\u20132655 (2021)","DOI":"10.1609\/aaai.v35i3.16368"},{"key":"9_CR60","unstructured":"Tripathi, S., Bhiwandiwalla, A., Bastidas, A., Tang, H.: Using scene graph context to improve image generation. arXiv preprint arXiv:1901.03762 (2019)"},{"key":"9_CR61","doi-asserted-by":"crossref","unstructured":"Tseng, H.Y., Lee, H.Y., Jiang, L., Yang, M.H., Yang, W.: RetrieveGAN: Image Synthesis via Differentiable Patch Retrieval, pp. 242\u2013257 (2020)","DOI":"10.1007\/978-3-030-58598-3_15"},{"key":"9_CR62","doi-asserted-by":"crossref","unstructured":"Wang, L., Yang, S., Liu, S., Chen, Y.C.: Not all steps are created equal: selective diffusion distillation for image manipulation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7472\u20137481 (2023)","DOI":"10.1109\/ICCV51070.2023.00687"},{"key":"9_CR63","doi-asserted-by":"crossref","unstructured":"Wang, T.C., Liu, M.Y., Zhu, J.Y., Tao, A., Kautz, J., Catanzaro, B.: High-resolution image synthesis and semantic manipulation with conditional GANs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8798\u20138807 (2018)","DOI":"10.1109\/CVPR.2018.00917"},{"key":"9_CR64","doi-asserted-by":"publisher","unstructured":"Wang, X., Gupta, A.: Generative image modeling using style and structure adversarial networks. In: Computer Vision \u2013 ECCV 2016, pp. 318\u2013335. Springer (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_20","DOI":"10.1007\/978-3-319-46493-0_20"},{"issue":"4","key":"9_CR65","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"9_CR66","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"9_CR67","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., Odena, A.: Self-attention generative adversarial networks. In: International Conference on Machine Learning, pp. 7354\u20137363. PMLR (2019)"},{"key":"9_CR68","doi-asserted-by":"crossref","unstructured":"Zhang, H., et al.: StackGAN: text to photo-realistic image synthesis with stacked generative adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5907\u20135915 (2017)","DOI":"10.1109\/ICCV.2017.629"},{"key":"9_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, H., Hu, Z., Luo, C., Zuo, W., Wang, M.: Semantic image inpainting with progressive generative networks. In: Proceedings of the 26th ACM International Conference on Multimedia, pp. 1939\u20131947 (2018)","DOI":"10.1145\/3240508.3240625"},{"key":"9_CR70","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"9_CR71","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"key":"9_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: Learning object consistency and interaction in image generation from scene graphs. In: IJCAI, pp. 1731\u20131739 (2023)","DOI":"10.24963\/ijcai.2023\/192"},{"key":"9_CR73","unstructured":"Zhang, Z., He, H., Plummer, B.A., Liao, Z., Wang, H.: Complex scene image editing by scene graph comprehension (2023)"},{"key":"9_CR74","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Zheng, J., Fang, Z., Plummer, B.A.: Text-to-image editing by image information removal. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 5232\u20135241 (2024)","DOI":"10.1109\/WACV57701.2024.00515"},{"key":"9_CR75","doi-asserted-by":"crossref","unstructured":"Zhu, J.Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: 2017 IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.244"},{"key":"9_CR76","first-page":"3833","volume":"33","author":"B Zoph","year":"2020","unstructured":"Zoph, B., et al.: Rethinking pre-training and self-training. Adv. Neural. Inf. Process. Syst. 33, 3833\u20133845 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91838-4_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,25]],"date-time":"2025-05-25T18:34:30Z","timestamp":1748198070000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91838-4_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031918377","9783031918384"],"references-count":76,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91838-4_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}