{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T00:24:13Z","timestamp":1769041453699,"version":"3.49.0"},"publisher-location":"Cham","reference-count":38,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032093202","type":"print"},{"value":"9783032093219","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:00:00Z","timestamp":1762560000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:00:00Z","timestamp":1762560000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-09321-9_21","type":"book-chapter","created":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T04:58:04Z","timestamp":1762491484000},"page":"307-321","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["FaR: Enhancing Multi-concept Text-to-Image Diffusion via\u00a0Concept Fusion and\u00a0Localized Refinement"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1889-1465","authenticated-orcid":false,"given":"Gia-Nghia","family":"Tran","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7477-4702","authenticated-orcid":false,"given":"Quang-Huy","family":"Che","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1549-989X","authenticated-orcid":false,"given":"Trong-Tai Dam","family":"Vu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3031-1711","authenticated-orcid":false,"given":"Bich-Nga","family":"Pham","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4260-7874","authenticated-orcid":false,"given":"Vinh-Tiep","family":"Nguyen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7363-2610","authenticated-orcid":false,"given":"Trung-Nghia","family":"Le","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3046-3041","authenticated-orcid":false,"given":"Minh-Triet","family":"Tran","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,8]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Avrahami, O., Aberman, K., Fried, O., Cohen-Or, D., Lischinski, D.: Break-a-scene: extracting multiple concepts from a single image. In: SIGGRAPH Asia 2023 Conference Papers, pp. 1\u201312 (2023)","DOI":"10.1145\/3610548.3618154"},{"key":"21_CR2","unstructured":"Chae, D., Park, N., Kim, J., Lee, K.: InstructBooth: instruction-following personalized text-to-image generation. arXiv preprint arXiv:2312.03011 (2023)"},{"issue":"4","key":"21_CR3","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3592116","volume":"42","author":"H Chefer","year":"2023","unstructured":"Chefer, H., Alaluf, Y., Vinker, Y., Wolf, L., Cohen-Or, D.: Attend-and-excite: attention-based semantic guidance for text-to-image diffusion models. ACM Trans. Graphics (TOG) 42(4), 1\u201310 (2023)","journal-title":"ACM Trans. Graphics (TOG)"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Ding, G., et al.: FreeCustom: tuning-free customized image generation for multi-concept composition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9089\u20139098 (2024)","DOI":"10.1109\/CVPR52733.2024.00868"},{"key":"21_CR5","unstructured":"Feng, W., et al.: Training-free structured diffusion guidance for compositional text-to-image synthesis. arXiv preprint arXiv:2212.05032 (2022)"},{"key":"21_CR6","unstructured":"Fu, S., et al.: DreamSim: learning new dimensions of human visual similarity using synthetic data. arXiv preprint arXiv:2306.09344 (2023)"},{"key":"21_CR7","unstructured":"Gal, R., et al.: An image is worth one word: personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"21_CR8","unstructured":"Gu, Y., et\u00a0al.: Mix-of-show: decentralized low-rank adaptation for multi-concept customization of diffusion models. Adv. Neural. Inf. Process. Syst. 36 (2024)"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Han, L., Li, Y., Zhang, H., Milanfar, P., Metaxas, D., Yang, F.: SVDiff: compact parameter space for diffusion fine-tuning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7323\u20137334 (2023)","DOI":"10.1109\/ICCV51070.2023.00673"},{"key":"21_CR10","unstructured":"Hertz, A., Mokady, R., Tenenbaum, J., Aberman, K., Pritch, Y., Cohen-Or, D.: Prompt-to-prompt image editing with cross attention control. arXiv preprint arXiv:2208.01626 (2022)"},{"key":"21_CR11","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"21_CR12","unstructured":"Hu, E.J., et al.: LoRA: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)"},{"key":"21_CR13","unstructured":"Jang, S., Jo, J., Lee, K., Hwang, S.J.: Identity decoupling for multi-subject personalization of text-to-image models. arXiv preprint arXiv:2404.04243 (2024)"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Kim, Y., Lee, J., Kim, J.H., Ha, J.W., Zhu, J.Y.: Dense text-to-image generation with attention modulation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7701\u20137711 (2023)","DOI":"10.1109\/ICCV51070.2023.00708"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Kumari, N., Zhang, B., Zhang, R., Shechtman, E., Zhu, J.Y.: Multi-concept customization of text-to-image diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1931\u20131941 (2023)","DOI":"10.1109\/CVPR52729.2023.00192"},{"key":"21_CR16","doi-asserted-by":"crossref","unstructured":"Kwon, G., Jenni, S., Li, D., Lee, J.Y., Ye, J.C., Heilbron, F.C.: Concept weaver: enabling multi-concept fusion in text-to-image models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8880\u20138889 (2024)","DOI":"10.1109\/CVPR52733.2024.00848"},{"key":"21_CR17","unstructured":"Li, L., Zeng, H., Yang, C., Jia, H., Xu, D.: Block-wise LoRA: revisiting fine-grained LoRA for effective personalization and stylization in text-to-image generation. arXiv preprint arXiv:2403.07500 (2024)"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., et al.: GLIGEN: open-set grounded text-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22511\u201322521 (2023)","DOI":"10.1109\/CVPR52729.2023.02156"},{"key":"21_CR19","unstructured":"Li, Y., Keuper, M., Zhang, D., Khoreva, A.: Divide & bind your attention for improved generative semantic nursing. In: 34th British Machine Vision Conference 2023, BMVC 2023 (2023)"},{"key":"21_CR20","unstructured":"Liu, Z., et al.: Cones: concept neurons in diffusion models for customized generation. arXiv preprint arXiv:2303.05125 (2023)"},{"key":"21_CR21","unstructured":"Liu, Z., et al.: Cones 2: customizable image synthesis with multiple subjects. In: Proceedings of the 37th International Conference on Neural Information Processing Systems, pp. 57500\u201357519 (2023)"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Ma, J., Liang, J., Chen, C., Lu, H.: Subject-diffusion: open domain personalized text-to-image generation without test-time fine-tuning. In: ACM SIGGRAPH 2024 Conference Papers, pp. 1\u201312 (2024)","DOI":"10.1145\/3641519.3657469"},{"key":"21_CR23","unstructured":"Oquab, M., et\u00a0al.: DINOv2: learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)"},{"key":"21_CR24","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"21_CR25","unstructured":"Rassin, R., Hirsch, E., Glickman, D., Ravfogel, S., Goldberg, Y., Chechik, G.: Linguistic binding in diffusion models: enhancing attribute correspondence through attention map alignment. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"21_CR26","unstructured":"Ren, T., et\u00a0al.: Grounded SAM: assembling open-world models for diverse visual tasks. arXiv preprint arXiv:2401.14159 (2024)"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"21_CR29","doi-asserted-by":"crossref","unstructured":"Sauer, A., Boesel, F., Dockhorn, T., Blattmann, A., Esser, P., Rombach, R.: Fast high-resolution image synthesis with latent adversarial diffusion distillation. arXiv preprint arXiv:2403.12015 (2024)","DOI":"10.1145\/3680528.3687625"},{"key":"21_CR30","unstructured":"Shen, G., et\u00a0al.: SG-adapter: enhancing text-to-image generation with scene graph guidance. arXiv preprint arXiv:2405.15321 (2024)"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Wang, J., Chan, K.C., Loy, C.C.: Exploring clip for assessing the look and feel of images. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a037, pp. 2555\u20132563 (2023)","DOI":"10.1609\/aaai.v37i2.25353"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Wang, X., Darrell, T., Rambhatla, S.S., Girdhar, R., Misra, I.: InstanceDiffusion: instance-level control for image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6232\u20136242 (2024)","DOI":"10.1109\/CVPR52733.2024.00596"},{"key":"21_CR33","unstructured":"Xu, J., et al.: ImageReward: learning and evaluating human preferences for text-to-image generation. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"21_CR34","unstructured":"Yang, Y., et\u00a0al.: LoRA-composer: leveraging low-rank adaptation for multi-concept customization in training-free diffusion models. arXiv preprint arXiv:2403.11627 (2024)"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Yao, Z., Feng, F., Li, R., Wang, X.: Concept conductor: orchestrating multiple personalized concepts in text-to-image synthesis. arXiv preprint arXiv:2408.03632 (2024)","DOI":"10.1609\/aaai.v39i9.33021"},{"key":"21_CR36","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1007\/978-3-031-72983-6_18","volume-title":"Computer Vision \u2013 ECCV 2024","author":"B Zhang","year":"2025","unstructured":"Zhang, B., Zhang, P., Dong, X., Zang, Y., Wang, J.: Long-CLIP: unlocuking the long-text capability of CLIP. In: Leonardis, A., Ricci, E., Roth, S., Russakovsky, O., Sattler, T., Varol, G. (eds.) ECCV 2024. LNCS, vol. 15109, pp. 310\u2013325. Springer, Cham (2025). https:\/\/doi.org\/10.1007\/978-3-031-72983-6_18"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, L., Rao, A., Agrawala, M.: Adding conditional control to text-to-image diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3836\u20133847 (2023)","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Zheng, G., Zhou, X., Li, X., Qi, Z., Shan, Y., Li, X.: LayoutDiffusion: controllable diffusion model for layout-to-image generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22490\u201322499 (2023)","DOI":"10.1109\/CVPR52729.2023.02154"}],"container-title":["Lecture Notes in Computer Science","Computational Collective Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-09321-9_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T04:58:10Z","timestamp":1762491490000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-09321-9_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,8]]},"ISBN":["9783032093202","9783032093219"],"references-count":38,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-09321-9_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,8]]},"assertion":[{"value":"8 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICCCI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Computational Collective Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ho Chi Minh City","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 November 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iccci2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iccci.pwr.edu.pl\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}