{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T11:21:51Z","timestamp":1772623311335,"version":"3.50.1"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031729799","type":"print"},{"value":"9783031729805","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-72980-5_20","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"340-356","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Unveiling and\u00a0Mitigating Memorization in\u00a0Text-to-Image Diffusion Models Through Cross Attention"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2663-6405","authenticated-orcid":false,"given":"Jie","family":"Ren","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6227-7844","authenticated-orcid":false,"given":"Yaxin","family":"Li","sequence":"additional","affiliation":[]},{"given":"Shenglai","family":"Zeng","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4016-6748","authenticated-orcid":false,"given":"Han","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Lingjuan","family":"Lyu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7723-0048","authenticated-orcid":false,"given":"Yue","family":"Xing","sequence":"additional","affiliation":[]},{"given":"Jiliang","family":"Tang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"20_CR1","unstructured":"Deepfloyd if. https:\/\/github.com\/deep-floyd\/IF"},{"key":"20_CR2","unstructured":"stablediffusionapi\/epicrealism. https:\/\/huggingface.co\/stablediffusionapi\/epicrealism"},{"key":"20_CR3","unstructured":"Achiam, J., et al.: GPT-4 technical report. arXiv preprint arXiv:2303.08774 (2023)"},{"key":"20_CR4","unstructured":"Carlini, N., Ippolito, D., Jagielski, M., Lee, K., Tramer, F., Zhang, C.: Quantifying memorization across neural language models. arXiv preprint arXiv:2202.07646 (2022)"},{"key":"20_CR5","unstructured":"Carlini, N., Jagielski, M., Zhang, C., Papernot, N., Terzis, A., Tramer, F.: The privacy onion effect: memorization is relative. In: Advance Neural Information Processing Systems, vol. 35, pp. 13263\u201313276 (2022)"},{"key":"20_CR6","unstructured":"Carlini, N., et\u00a0al.: Extracting training data from large language models. In: 30th USENIX Security Symposium (USENIX Security 21), pp. 2633\u20132650 (2021)"},{"key":"20_CR7","unstructured":"Carlini, N., et al.: Extracting training data from diffusion models. In: 32nd USENIX Security Symposium (USENIX Security 23), pp. 5253\u20135270 (2023)"},{"issue":"4","key":"20_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592116","volume":"42","author":"H Chefer","year":"2023","unstructured":"Chefer, H., Alaluf, Y., Vinker, Y., Wolf, L., Cohen-Or, D.: Attend-and-excite: attention-based semantic guidance for text-to-image diffusion models. ACM Trans. Graph. (TOG) 42(4), 1\u201310 (2023)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"20_CR9","unstructured":"Chen, J., et\u00a0al.: Pixart-$$\\alpha $$: fast training of diffusion transformer for photorealistic text-to-image synthesis. arXiv preprint arXiv:2310.00426 (2023)"},{"key":"20_CR10","unstructured":"Daras, G., Shah, K., Dagan, Y., Gollakota, A., Dimakis, A., Klivans, A.: Ambient diffusion: learning clean distributions from corrupted data. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR11","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-or, D.: prompt-to-prompt image editing with cross-attention control. In: The Eleventh International Conference on Learning Representations (2022)"},{"key":"20_CR12","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: Gans trained by a two time-scale update rule converge to a local Nash equilibrium. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"20_CR13","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advance Neural Information Processing Systems, vol. 33, pp. 6840\u20136851 (2020)"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Jiang, H.H., et al.: AI art and its impact on artists. In: Proceedings of the 2023 AAAI\/ACM Conference on AI, Ethics, and Society, pp. 363\u2013374 (2023)","DOI":"10.1145\/3600211.3604681"},{"key":"20_CR15","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Koh, J.Y., Baldridge, J., Lee, H., Yang, Y.: Text-to-image generation grounded by fine-grained user attention. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 237\u2013246 (2021)","DOI":"10.1109\/WACV48630.2021.00028"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Lee, K.H., Chen, X., Hua, G., Hu, H., He, X.: Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 201\u2013216 (2018)","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, Y., Li, W., Lin, Z., Jia, J.: Video-p2p: video editing with cross-attention control. arXiv preprint arXiv:2303.04761 (2023)","DOI":"10.1109\/CVPR52733.2024.00821"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Ma, W.D.K., Lewis, J., Kleijn, W.B., Leung, T.: Directed diffusion: direct control of object placement through attention guidance. arXiv preprint arXiv:2302.13153 (2023)","DOI":"10.1609\/aaai.v38i5.28204"},{"key":"20_CR20","unstructured":"Mireshghallah, F., Uniyal, A., Wang, T., Evans, D., Berg-Kirkpatrick, T.: Memorization in nlp fine-tuning methods. arXiv preprint arXiv:2205.12506 (2022)"},{"key":"20_CR21","unstructured":"Naseh, A., Roh, J., Houmansadr, A.: Memory triggers: unveiling memorization in text-to-image generative models through word-level duplication. arXiv preprint arXiv:2312.03692 (2023)"},{"key":"20_CR22","unstructured":"Naseh, A., Roh, J., Houmansadr, A.: Understanding (un) intended memorization in text-to-image generative models. arXiv preprint arXiv:2312.07550 (2023)"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Pizzi, E., Roy, S.D., Ravindra, S.N., Goyal, P., Douze, M.: a self-supervised descriptor for image copy detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14532\u201314542 (2022)","DOI":"10.1109\/CVPR52688.2022.01413"},{"key":"20_CR24","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"20_CR25","unstructured":"Ramesh, A., Dhariwal, P., Nichol, A., Chu, C., Chen, M.: Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125 1(2), 3 (2022)"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"20_CR27","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"20_CR28","unstructured":"Saharia, C., et al.: Photorealistic text-to-image diffusion models with deep language understanding. In: Advance Neural Information Processing Systems, vol. 35, pp. 36479\u201336494 (2022)"},{"key":"20_CR29","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: Advance Neural Information Processing Systems, vol. 35, pp. 25278\u201325294 (2022)"},{"key":"20_CR30","doi-asserted-by":"crossref","unstructured":"Somepalli, G., Singla, V., Goldblum, M., Geiping, J., Goldstein, T.: Diffusion art or digital forgery? Investigating data replication in diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6048\u20136058 (2023)","DOI":"10.1109\/CVPR52729.2023.00586"},{"key":"20_CR31","unstructured":"Somepalli, G., Singla, V., Goldblum, M., Geiping, J., Goldstein, T.: Understanding and mitigating copying in diffusion models. arXiv preprint arXiv:2305.20086 (2023)"},{"key":"20_CR32","unstructured":"Somepalli, G., Singla, V., Goldblum, M., Geiping, J., Goldstein, T.: Understanding and mitigating copying in diffusion models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"20_CR33","doi-asserted-by":"crossref","unstructured":"Tang, R., et al.: What the DAAM: interpreting stable diffusion using cross attention. arXiv preprint arXiv:2210.04885 (2022)","DOI":"10.18653\/v1\/2023.acl-long.310"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Vig, J., Belinkov, Y.: Analyzing the structure of attention in a transformer language model. arXiv preprint arXiv:1906.04284 (2019)","DOI":"10.18653\/v1\/W19-4808"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Wallace, B., et al.: Diffusion model alignment using direct preference optimization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8228\u20138238 (2024)","DOI":"10.1109\/CVPR52733.2024.00786"},{"key":"20_CR36","unstructured":"Webster, R.: A reproducible extraction of training images from diffusion models. arXiv preprint arXiv:2305.08694 (2023)"},{"key":"20_CR37","unstructured":"Webster, R., Rabin, J., Simon, L., Jurie, F.: On the de-duplication of laion-2b. arXiv preprint arXiv:2303.12733 (2023)"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Wei, X., Zhang, T., Li, Y., Zhang, Y., Wu, F.: Multi-modality cross attention network for image and sentence matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10941\u201310950 (2020)","DOI":"10.1109\/CVPR42600.2020.01095"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Wei, Y., Zhang, Y., Ji, Z., Bai, J., Zhang, L., Zuo, W.: Elite: encoding visual concepts into textual embeddings for customized text-to-image generation. arXiv preprint arXiv:2302.13848 (2023)","DOI":"10.1109\/ICCV51070.2023.01461"},{"key":"20_CR40","unstructured":"Wen, Y., Liu, Y., Chen, C., Lyu, L.: Detecting, explaining, and mitigating memorization in diffusion models. In: The Twelfth International Conference on Learning Representations (2024)"},{"key":"20_CR41","unstructured":"Zhai, S., et al.: Stabilizing transformer training by preventing attention entropy collapse. In: Krause, A., Brunskill, E., Cho, K., Engelhardt, B., Sabato, S., Scarlett, J. (eds.) Proceedings of the 40th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 40770\u201340803. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/zhai23a.html"},{"key":"20_CR42","unstructured":"Zhang, C., Ippolito, D., Lee, K., Jagielski, M., Tram\u00e8r, F., Carlini, N.: Counterfactual memorization in neural language models. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72980-5_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:23:19Z","timestamp":1730107399000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72980-5_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031729799","9783031729805"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72980-5_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}